From 14d8eb9e40a6329abcb2f153174b543349c68c13 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 18 Jan 2023 22:17:14 +0100 Subject: autotoken: splicing; splice_optout --- docs/custom_mutators.md | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'docs/custom_mutators.md') diff --git a/docs/custom_mutators.md b/docs/custom_mutators.md index 4ffeda7a..322caa5b 100644 --- a/docs/custom_mutators.md +++ b/docs/custom_mutators.md @@ -48,6 +48,7 @@ C/C++: ```c void *afl_custom_init(afl_state_t *afl, unsigned int seed); unsigned int afl_custom_fuzz_count(void *data, const unsigned char *buf, size_t buf_size); +void afl_custom_splice_optout(void *data); size_t afl_custom_fuzz(void *data, unsigned char *buf, size_t buf_size, unsigned char **out_buf, unsigned char *add_buf, size_t add_buf_size, size_t max_size); const char *afl_custom_describe(void *data, size_t max_description_len); size_t afl_custom_post_process(void *data, unsigned char *buf, size_t buf_size, unsigned char **out_buf); @@ -72,6 +73,9 @@ def init(seed): def fuzz_count(buf): return cnt +def splice_optout() + pass + def fuzz(buf, add_buf, max_size): return mutated_out @@ -132,6 +136,13 @@ def deinit(): # optional for Python for a specific queue entry, use this function. This function is most useful if `AFL_CUSTOM_MUTATOR_ONLY` is **not** used. +- `splice_optout` (optional): + + If this function is present, no splicing target is passed to the `fuzz` + function. This saves time if splicing data is not needed by the custom + fuzzing function. + This function is never called, just needs to be present to activate. + - `fuzz` (optional): This method performs custom mutations on a given input. It also accepts an -- cgit 1.4.1 From eeca3a0b2939c605497e9b3a615ee4a466f4a3f2 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Thu, 19 Jan 2023 11:52:19 +0100 Subject: lots of fixes --- custom_mutators/autotokens/TODO | 2 +- custom_mutators/autotokens/autotokens.cpp | 424 +++++++++++++++++++----------- docs/custom_mutators.md | 1 + include/afl-fuzz.h | 11 +- src/afl-fuzz-one.c | 3 +- 5 files changed, 279 insertions(+), 162 deletions(-) (limited to 'docs/custom_mutators.md') diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO index 95b79373..2e39511c 100644 --- a/custom_mutators/autotokens/TODO +++ b/custom_mutators/autotokens/TODO @@ -3,4 +3,4 @@ cmplog: only add tokens that were found to fit? create from thin air if no good seed after a cycle and dict large enough? (static u32 no_of_struct_inputs;) -splicing -> check if whitespace/token is needed \ No newline at end of file +splicing -> check if whitespace/token is needed diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 4f3289c9..102bea0f 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -38,8 +38,10 @@ typedef struct my_mutator { } my_mutator_t; -#define DEBUG \ +#undef DEBUGF +#define DEBUGF \ if (unlikely(debug)) fprintf +#define IFDEBUG if (unlikely(debug)) static afl_state *afl_ptr; static int debug = AUTOTOKENS_DEBUG; @@ -57,12 +59,12 @@ static unordered_map *> id_mapping; static unordered_map token_to_id; static unordered_map id_to_token; static string whitespace = AUTOTOKENS_WHITESPACE; +static string output; static regex *regex_comment_custom; -static regex regex_comment_star("/\\*([:print:]|\n)*?\\*/", - regex::multiline | regex::optimize); -static regex regex_word("[A-Za-z0-9_$.-]+", regex::optimize); -static regex regex_whitespace(R"([ \t]+)", regex::optimize); -static regex regex_string("\"[[:print:]]*?\"|'[[:print:]]*?'", regex::optimize); +static regex regex_comment_star("/\\*([:print:]|\n)*?\\*/", + regex::multiline | regex::optimize); +static regex regex_word("[A-Za-z0-9_$.-]+", regex::optimize); +static regex regex_whitespace(R"([ \t]+)", regex::optimize); static vector *s; // the structure of the currently selected input u32 good_whitespace_or_singleval() { @@ -104,7 +106,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, MAX(AUTOTOKENS_CHANGE_MIN, MIN(m_size >> 3, HAVOC_CYCLES * afl_ptr->queue_cur->perf_score * afl_ptr->havoc_div / 256)); - // DEBUG(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds); + // DEBUGF(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds); #if AUTOTOKENS_SPLICE_DISABLE == 1 #define AUTOTOKENS_MUT_MAX 12 @@ -112,7 +114,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, #define AUTOTOKENS_MUT_MAX 14 #endif - u32 max_rand = AUTOTOKENS_MUT_MAX; + u32 max_rand = AUTOTOKENS_MUT_MAX, new_item, pos; for (i = 0; i < rounds; ++i) { @@ -122,8 +124,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, case 0 ... 7: /* fall through */ { - u32 pos = rand_below(afl_ptr, m_size); - u32 cur_item = m[pos], new_item; + pos = rand_below(afl_ptr, m_size); + u32 cur_item = m[pos]; do { new_item = rand_below(afl_ptr, current_id); @@ -135,7 +137,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, ((whitespace_ids < new_item && whitespace_ids >= cur_item) || (whitespace_ids >= new_item && whitespace_ids < cur_item))))); - DEBUG(stderr, "MUT: %u -> %u\n", cur_item, new_item); + DEBUGF(stderr, "MUT: %u -> %u\n", cur_item, new_item); m[pos] = new_item; break; @@ -144,7 +146,6 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, /* INSERT (m_size +1 so we insert also after last place) */ case 8 ... 9: { - u32 new_item; do { new_item = rand_below(afl_ptr, current_id); @@ -154,7 +155,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, u32 pos = rand_below(afl_ptr, m_size + 1); m.insert(m.begin() + pos, new_item); ++m_size; - DEBUG(stderr, "INS: %u at %u\n", new_item, pos); + DEBUGF(stderr, "INS: %u at %u\n", new_item, pos); if (likely(!alternative_tokenize)) { @@ -212,7 +213,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, m.insert(m.begin() + dst_off, src->begin() + src_off, src->begin() + src_off + n); m_size += n; - DEBUG(stderr, "SPLICE-INS: %u at %u\n", n, dst_off); + DEBUGF(stderr, "SPLICE-INS: %u at %u\n", n, dst_off); + break; } @@ -231,13 +233,36 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, copy(src->begin() + src_off, src->begin() + src_off + n, m.begin() + dst_off); - DEBUG(stderr, "SPLICE-MUT: %u at %u\n", n, dst_off); + DEBUGF(stderr, "SPLICE-MUT: %u at %u\n", n, dst_off); break; } } + if (likely(!alternative_tokenize)) { + + // do we need a whitespace/token at the beginning? + if (dst_off && id_to_token[m[dst_off - 1]].size() > 1 && + id_to_token[m[dst_off]].size() > 1) { + + m.insert(m.begin() + dst_off, good_whitespace_or_singleval()); + ++m_size; + + } + + // do we need a whitespace/token at the end? + if (dst_off + n < m_size && + id_to_token[m[dst_off + n - 1]].size() > 1 && + id_to_token[m[dst_off + n]].size() > 1) { + + m.insert(m.begin() + dst_off + n, good_whitespace_or_singleval()); + ++m_size; + + } + + } + break; } @@ -249,11 +274,32 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, if (m_size > 8) { - m.erase(m.begin() + rand_below(afl_ptr, m_size)); - --m_size; + do { + + pos = rand_below(afl_ptr, m_size); + + } while (unlikely(pos < whitespace_ids)); + + // if what we delete will result in a missing whitespace/token, + // instead of deleting we switch the item to a whitespace or token. + if (likely(!alternative_tokenize) && pos && pos < m_size && + id_to_token[m[pos - 1]].size() > 1 && + id_to_token[m[pos + 1]].size() > 1) { + + m[pos] = good_whitespace_or_singleval(); + + } else { + + m.erase(m.begin() + pos); + --m_size; + + } } else { + // if the data is already too small do not try to make it smaller + // again this run. + max_rand = AUTOTOKENS_MUT_MAX - 2; } @@ -262,14 +308,12 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } - // TODO: add full line insert splice, replace splace, delete - } } - string output; - u32 m_size_1 = m_size - 1; + u32 m_size_1 = m_size - 1; + output = ""; for (i = 0; i < m_size; ++i) { @@ -282,31 +326,108 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } - u32 mutated_size = output.size(); - u8 *mutated_out = (u8 *)afl_realloc((void **)out_buf, mutated_size); + u32 mutated_size = (u32)output.size(); + u8 *mutated_out = (u8 *)output.data(); - if (unlikely(!mutated_out)) { + if (unlikely(mutated_size > max_size)) { mutated_size = max_size; } - *out_buf = NULL; - return 0; - - } - - if (unlikely(debug)) { + IFDEBUG { - DEBUG(stderr, "MUTATED to %u bytes:\n", mutated_size); + DEBUGF(stderr, "MUTATED to %u bytes:\n", mutated_size); fwrite(output.data(), 1, mutated_size, stderr); - DEBUG(stderr, "\n---\n"); + DEBUGF(stderr, "\n---\n"); } - memcpy(mutated_out, output.data(), mutated_size); *out_buf = mutated_out; ++fuzz_count; return mutated_size; } +/* I get f*cking stack overflow using C++ regex with a regex of + "\"[[:print:]]*?\"" if this matches a long string even with regex::optimize + enabled :-( */ +u8 my_search_string(string::const_iterator cur, string::const_iterator ende, + string::const_iterator *match_begin, + string::const_iterator *match_end) { + + string::const_iterator start = cur, found_begin; + u8 quote_type = 0; + + while (cur < ende) { + + switch (*cur) { + + case '"': { + + if (cur == start || *(cur - 1) != '\\') { + + if (!quote_type) { + + found_begin = cur; + quote_type = 1; + + } else if (quote_type == 1) { + + *match_begin = found_begin; + *match_end = cur + 1; + return 1; + + } + + } + + break; + + } + + case '\'': { + + if (cur == start || *(cur - 1) != '\\') { + + if (!quote_type) { + + found_begin = cur; + quote_type = 2; + + } else if (quote_type == 2) { + + *match_begin = found_begin; + *match_end = cur + 1; + return 1; + + } + + } + + break; + + } + + case '\n': + case '\r': + case 0: { + + quote_type = 0; + break; + + } + + default: + if (unlikely(quote_type && !isprint(*cur))) { quote_type = 0; } + break; + + } + + ++cur; + + } + + return 0; + +} + /* We are not using afl_custom_queue_new_entry() because not every corpus entry will be necessarily fuzzed. so we use afl_custom_queue_get() instead */ @@ -321,7 +442,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, (only_fav && !afl_ptr->queue_cur->favored)) { s = NULL; - DEBUG(stderr, "cmplog not ascii or only_fav and not favorite\n"); + DEBUGF(stderr, "cmplog not ascii or only_fav and not favorite\n"); return 0; } @@ -356,7 +477,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } ++extras_cnt; - DEBUG(stderr, "Added from dictionary: \"%s\"\n", ptr); + DEBUGF(stderr, "Added from dictionary: \"%s\"\n", ptr); } @@ -385,7 +506,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } ++a_extras_cnt; - DEBUG(stderr, "Added from auto dictionary: \"%s\"\n", ptr); + DEBUGF(stderr, "Added from auto dictionary: \"%s\"\n", ptr); } @@ -415,7 +536,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, fclose(fp); file_mapping[fn] = structure; // NULL ptr so we don't read the file again s = NULL; - DEBUG(stderr, "Too short (%lu) %s\n", len, filename); + DEBUGF(stderr, "Too short (%lu) %s\n", len, filename); return 0; } @@ -443,14 +564,14 @@ extern "C" unsigned char afl_custom_queue_get(void *data, file_mapping[fn] = NULL; s = NULL; - DEBUG(stderr, "Not text (%lu) %s\n", len, filename); + DEBUGF(stderr, "Not text (%lu) %s\n", len, filename); return 0; } } - // DEBUG(stderr, "Read %lu bytes for %s\nBefore comment trim:\n%s\n", + // DEBUGF(stderr, "Read %lu bytes for %s\nBefore comment trim:\n%s\n", // input.size(), filename, input.c_str()); if (regex_comment_custom) { @@ -463,15 +584,15 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - DEBUG(stderr, "After replace %lu bytes for %s\n%s\n", input.size(), - filename, input.c_str()); + DEBUGF(stderr, "After replace %lu bytes for %s\n%s\n", input.size(), + filename, input.c_str()); u32 spaces = count(input.begin(), input.end(), ' '); u32 tabs = count(input.begin(), input.end(), '\t'); u32 linefeeds = count(input.begin(), input.end(), '\n'); bool ends_with_linefeed = input[input.length() - 1] == '\n'; - DEBUG(stderr, "spaces=%u tabs=%u linefeeds=%u ends=%u\n", spaces, tabs, - linefeeds, ends_with_linefeed); + DEBUGF(stderr, "spaces=%u tabs=%u linefeeds=%u ends=%u\n", spaces, tabs, + linefeeds, ends_with_linefeed); all_spaces += spaces; all_tabs += tabs; all_lf += linefeeds; @@ -479,25 +600,28 @@ extern "C" unsigned char afl_custom_queue_get(void *data, // now extract all tokens vector tokens; - smatch match; - string::const_iterator cur = input.begin(), ende = input.end(), found, prev; + string::const_iterator cur = input.begin(), ende = input.end(), found, prev, + match_begin, match_end; - DEBUG(stderr, "START!\n"); + DEBUGF(stderr, "START!\n"); if (likely(!alternative_tokenize)) { - while (regex_search(cur, ende, match, regex_string, - regex_constants::match_any | - regex_constants::match_not_null | - regex_constants::match_continuous)) { + while (my_search_string(cur, ende, &match_begin, &match_end)) { prev = cur; - found = match[0].first; - cur = match[0].second; - DEBUG(stderr, - "string %s found at start %lu offset %lu continue at %lu\n", - match[0].str().c_str(), prev - input.begin(), match.position(), - cur - input.begin()); + found = match_begin; + cur = match_end; + + IFDEBUG { + + string foo(match_begin, match_end); + DEBUGF(stderr, + "string %s found at start %lu offset %lu continue at %lu\n", + foo.c_str(), prev - input.begin(), found - prev, + cur - input.begin()); + + } if (prev < found) { // there are items between search start and find while (prev < found) { @@ -512,8 +636,8 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } tokens.push_back(std::string(start, prev)); - DEBUG(stderr, "WHITESPACE %ld \"%s\"\n", prev - start, - tokens[tokens.size() - 1].c_str()); + DEBUGF(stderr, "WHITESPACE %ld \"%s\"\n", prev - start, + tokens[tokens.size() - 1].c_str()); } else if (isalnum(*prev) || *prev == '$' || *prev == '_') { @@ -525,14 +649,14 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - tokens.push_back(std::string(start, prev)); - DEBUG(stderr, "IDENTIFIER %ld \"%s\"\n", prev - start, - tokens[tokens.size() - 1].c_str()); + tokens.push_back(string(start, prev)); + DEBUGF(stderr, "IDENTIFIER %ld \"%s\"\n", prev - start, + tokens[tokens.size() - 1].c_str()); } else { - tokens.push_back(std::string(prev, prev + 1)); - DEBUG(stderr, "OTHER \"%c\"\n", *prev); + tokens.push_back(string(prev, prev + 1)); + DEBUGF(stderr, "OTHER \"%c\"\n", *prev); ++prev; } @@ -541,11 +665,12 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - if (match[0].length() > 0) { tokens.push_back(match[0]); } + tokens.push_back(string(match_begin, match_end)); + DEBUGF(stderr, "TOK: %s\n", tokens[tokens.size() - 1].c_str()); } - DEBUG(stderr, "AFTER all strings\n"); + DEBUGF(stderr, "AFTER all strings\n"); if (cur < ende) { @@ -561,8 +686,8 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } tokens.push_back(std::string(start, cur)); - DEBUG(stderr, "WHITESPACE %ld \"%s\"\n", cur - start, - tokens[tokens.size() - 1].c_str()); + DEBUGF(stderr, "WHITESPACE %ld \"%s\"\n", cur - start, + tokens[tokens.size() - 1].c_str()); } else if (isalnum(*cur) || *cur == '$' || *cur == '_') { @@ -575,13 +700,13 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } tokens.push_back(std::string(start, cur)); - DEBUG(stderr, "IDENTIFIER %ld \"%s\"\n", cur - start, - tokens[tokens.size() - 1].c_str()); + DEBUGF(stderr, "IDENTIFIER %ld \"%s\"\n", cur - start, + tokens[tokens.size() - 1].c_str()); } else { tokens.push_back(std::string(cur, cur + 1)); - DEBUG(stderr, "OTHER \"%c\"\n", *cur); + DEBUGF(stderr, "OTHER \"%c\"\n", *cur); ++cur; } @@ -593,19 +718,21 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } else { // alternative tokenize - - while (regex_search(cur, ende, match, regex_string, - regex_constants::match_any | - regex_constants::match_not_null | - regex_constants::match_continuous)) { + while (my_search_string(cur, ende, &match_begin, &match_end)) { prev = cur; - found = match[0].first; - cur = match[0].second; - DEBUG(stderr, - "string %s found at start %lu offset %lu continue at %lu\n", - match[0].str().c_str(), prev - input.begin(), match.position(), - cur - input.begin()); + found = match_begin; + cur = match_end; + IFDEBUG { + + string foo(match_begin, match_end); + DEBUGF(stderr, + "string %s found at start %lu offset %lu continue at %lu\n", + foo.c_str(), prev - input.begin(), found - prev, + cur - input.begin()); + + } + if (prev < found) { // there are items between search start and find sregex_token_iterator it{prev, found, regex_whitespace, -1}; vector tokenized{it, {}}; @@ -619,10 +746,10 @@ extern "C" unsigned char afl_custom_queue_get(void *data, tokenized.end()); tokens.reserve(tokens.size() + tokenized.size() * 2 + 1); - if (unlikely(debug)) { + IFDEBUG { - DEBUG(stderr, "tokens: %lu input size: %lu\n", tokenized.size(), - input.size()); + DEBUGF(stderr, "tokens1: %lu input size: %lu\n", tokenized.size(), + input.size()); for (auto x : tokenized) { cerr << x << endl; @@ -636,10 +763,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, string::const_iterator c = token.begin(), e = token.end(), f, p; smatch m; - while (regex_search(c, e, m, regex_word, - regex_constants::match_any | - regex_constants::match_not_null | - regex_constants::match_continuous)) { + while (regex_search(c, e, m, regex_word)) { p = c; f = m[0].first; @@ -649,10 +773,10 @@ extern "C" unsigned char afl_custom_queue_get(void *data, // there are items between search start and find while (p < f) { - if (unlikely(debug)) { + IFDEBUG { string foo(p, p + 1); - DEBUG(stderr, "before string: \"%s\"\n", foo.c_str()); + DEBUGF(stderr, "before string: \"%s\"\n", foo.c_str()); } @@ -661,20 +785,21 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - /* - string foo(p, f); - DEBUG(stderr, "before string: \"%s\"\n", - foo.c_str()); tokens.push_back(std::string(p, f)); - */ + IFDEBUG { + + string foo(p, f); + DEBUGF(stderr, "before string: \"%s\"\n", foo.c_str()); + tokens.push_back(std::string(p, f)); + + } } - DEBUG( - stderr, - "SUBstring \"%s\" found at start %lu offset %lu continue at " - "%lu\n", - m[0].str().c_str(), p - input.begin(), m.position(), - c - token.begin()); + DEBUGF(stderr, + "SUBstring \"%s\" found at start %lu offset %lu continue " + "at %lu\n", + m[0].str().c_str(), p - input.begin(), m.position(), + c - token.begin()); tokens.push_back(m[0].str()); } @@ -683,10 +808,10 @@ extern "C" unsigned char afl_custom_queue_get(void *data, while (c < e) { - if (unlikely(debug)) { + IFDEBUG { string foo(c, c + 1); - DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); + DEBUGF(stderr, "after string: \"%s\"\n", foo.c_str()); } @@ -695,17 +820,14 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - /* - if (unlikely(debug)) { + IFDEBUG { - string foo(c, e); - DEBUG(stderr, "after string: \"%s\"\n", - foo.c_str()); + string foo(c, e); + DEBUGF(stderr, "after string: \"%s\"\n", foo.c_str()); - } + } - tokens.push_back(std::string(c, e)); - */ + tokens.push_back(std::string(c, e)); } @@ -713,7 +835,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - if (match[0].length() > 0) { tokens.push_back(match[0]); } + tokens.push_back(string(match_begin, match_end)); } @@ -727,10 +849,10 @@ extern "C" unsigned char afl_custom_queue_get(void *data, tokenized.end()); tokens.reserve(tokens.size() + tokenized.size() * 2 + 1); - if (unlikely(debug)) { + IFDEBUG { - DEBUG(stderr, "tokens: %lu input size: %lu\n", tokenized.size(), - input.size()); + DEBUGF(stderr, "tokens2: %lu input size: %lu\n", tokenized.size(), + input.size()); for (auto x : tokenized) { cerr << x << endl; @@ -744,10 +866,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, string::const_iterator c = token.begin(), e = token.end(), f, p; smatch m; - while (regex_search(c, e, m, regex_word, - regex_constants::match_any | - regex_constants::match_not_null | - regex_constants::match_continuous)) { + while (regex_search(c, e, m, regex_word)) { p = c; f = m[0].first; @@ -757,10 +876,10 @@ extern "C" unsigned char afl_custom_queue_get(void *data, // there are items between search start and find while (p < f) { - if (unlikely(debug)) { + IFDEBUG { string foo(p, p + 1); - DEBUG(stderr, "before string: \"%s\"\n", foo.c_str()); + DEBUGF(stderr, "before string: \"%s\"\n", foo.c_str()); } @@ -769,25 +888,22 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - /* - if (unlikely(debug)) { + IFDEBUG { - string foo(p, f); - DEBUG(stderr, "before string: \"%s\"\n", - foo.c_str()); + string foo(p, f); + DEBUGF(stderr, "before string: \"%s\"\n", foo.c_str()); - } + } - tokens.push_back(std::string(p, f)); - */ + tokens.push_back(std::string(p, f)); } - DEBUG(stderr, - "SUB2string \"%s\" found at start %lu offset %lu continue at " - "%lu\n", - m[0].str().c_str(), p - input.begin(), m.position(), - c - token.begin()); + DEBUGF(stderr, + "SUB2string \"%s\" found at start %lu offset %lu continue " + "at %lu\n", + m[0].str().c_str(), p - input.begin(), m.position(), + c - token.begin()); tokens.push_back(m[0].str()); } @@ -796,10 +912,10 @@ extern "C" unsigned char afl_custom_queue_get(void *data, while (c < e) { - if (unlikely(debug)) { + IFDEBUG { string foo(c, c + 1); - DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); + DEBUGF(stderr, "after string: \"%s\"\n", foo.c_str()); } @@ -808,16 +924,14 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - /* - if (unlikely(debug)) { + IFDEBUG { - string foo(c, e); - DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); + string foo(c, e); + DEBUGF(stderr, "after string: \"%s\"\n", foo.c_str()); - } + } - tokens.push_back(std::string(c, e)); - */ + tokens.push_back(std::string(c, e)); } @@ -827,22 +941,22 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - if (unlikely(debug)) { + IFDEBUG { - DEBUG(stderr, "DUMPING TOKENS:\n"); + DEBUGF(stderr, "DUMPING TOKENS:\n"); u32 size_1 = tokens.size() - 1; for (u32 i = 0; i < tokens.size(); ++i) { - DEBUG(stderr, "%s", tokens[i].c_str()); + DEBUGF(stderr, "%s", tokens[i].c_str()); if (unlikely(alternative_tokenize && i < size_1)) { - DEBUG(stderr, "%s", whitespace.c_str()); + DEBUGF(stderr, "%s", whitespace.c_str()); } } - DEBUG(stderr, "---------------------------\n"); + DEBUGF(stderr, "---------------------------\n"); } @@ -850,7 +964,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, file_mapping[fn] = NULL; s = NULL; - DEBUG(stderr, "too few tokens\n"); + DEBUGF(stderr, "too few tokens\n"); return 0; } @@ -886,21 +1000,23 @@ extern "C" unsigned char afl_custom_queue_get(void *data, all_structure_items += structure->size(); // we are done! - DEBUG(stderr, "DONE! We have %lu tokens in the structure\n", - structure->size()); + DEBUGF(stderr, "DONE! We have %lu tokens in the structure\n", + structure->size()); + + } - } else { + else { if (entry->second == NULL) { - DEBUG(stderr, "Skipping %s\n", filename); + DEBUGF(stderr, "Skipping %s\n", filename); s = NULL; return 0; } s = entry->second; - DEBUG(stderr, "OK %s\n", filename); + DEBUGF(stderr, "OK %s\n", filename); } diff --git a/docs/custom_mutators.md b/docs/custom_mutators.md index 322caa5b..82131c92 100644 --- a/docs/custom_mutators.md +++ b/docs/custom_mutators.md @@ -150,6 +150,7 @@ def deinit(): # optional for Python sense to use it. You would only skip this if `post_process` is used to fix checksums etc. so if you are using it, e.g., as a post processing library. Note that a length > 0 *must* be returned! + The returned output buffer is under **your** memory management! - `describe` (optional): diff --git a/include/afl-fuzz.h b/include/afl-fuzz.h index 1e8d085d..229bc025 100644 --- a/include/afl-fuzz.h +++ b/include/afl-fuzz.h @@ -844,15 +844,16 @@ struct custom_mutator { /** * Perform custom mutations on a given input * - * (Optional for now. Required in the future) + * (Optional) * - * @param data pointer returned in afl_custom_init by this custom mutator + * Getting an add_buf can be skipped by using afl_custom_splice_optout(). + * + * @param[in] data Pointer returned in afl_custom_init by this custom mutator * @param[in] buf Pointer to the input data to be mutated and the mutated * output * @param[in] buf_size Size of the input/output data - * @param[out] out_buf the new buffer. We may reuse *buf if large enough. - * *out_buf = NULL is treated as FATAL. - * @param[in] add_buf Buffer containing the additional test case + * @param[out] out_buf The new buffer, under your memory mgmt. + * @param[in] add_buf Buffer containing an additional test case (splicing) * @param[in] add_buf_size Size of the additional test case * @param[in] max_size Maximum size of the mutated output. The mutation must * not produce data larger than max_size. diff --git a/src/afl-fuzz-one.c b/src/afl-fuzz-one.c index 5e352dcb..bd482562 100644 --- a/src/afl-fuzz-one.c +++ b/src/afl-fuzz-one.c @@ -564,8 +564,7 @@ u8 fuzz_one_original(afl_state_t *afl) { if (afl->cmplog_lvl == 3 || (afl->cmplog_lvl == 2 && afl->queue_cur->tc_ref) || afl->queue_cur->favored || - !(afl->fsrv.total_execs % afl->queued_items) || - get_cur_time() - afl->last_find_time > 300000) { // 300 seconds + get_cur_time() - afl->last_find_time > 600000) { // 600 seconds if (input_to_state_stage(afl, in_buf, out_buf, len)) { -- cgit 1.4.1 From f756734ad2782c3ed56feadb4b7b23fc82a7a968 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Thu, 13 Apr 2023 12:07:27 +0200 Subject: fix attempt at post_process implementation --- docs/Changelog.md | 1 + docs/custom_mutators.md | 9 +++++++-- include/afl-fuzz.h | 9 +++++++-- src/afl-fuzz-python.c | 49 +++++++++---------------------------------------- src/afl-fuzz-run.c | 7 ++++++- 5 files changed, 30 insertions(+), 45 deletions(-) (limited to 'docs/custom_mutators.md') diff --git a/docs/Changelog.md b/docs/Changelog.md index 501300b1..9436fc9f 100644 --- a/docs/Changelog.md +++ b/docs/Changelog.md @@ -13,6 +13,7 @@ everyone who was affected! - allow pizza mode to be disabled when AFL_PIZZA_MODE is set to -1 - option `-p mmopt` now also selects new queue items more often + - fix bug in post_process custom mutator implementation - print name of custom mutator in UI - afl-cc: - add CFI sanitizer variant to gcc targets diff --git a/docs/custom_mutators.md b/docs/custom_mutators.md index 82131c92..a1de479e 100644 --- a/docs/custom_mutators.md +++ b/docs/custom_mutators.md @@ -118,7 +118,7 @@ def deinit(): # optional for Python ### Custom Mutation -- `init`: +- `init` (optional in Python): This method is called when AFL++ starts up and is used to seed RNG and set up buffers and state. @@ -184,6 +184,11 @@ def deinit(): # optional for Python to the target, e.g. if it is too short, too corrupted, etc. If so, return a NULL buffer and zero length (or a 0 length string in Python). + NOTE: Do not make any random changes to the data in this function! + + PERFORMANCE for C/C++: If possible make the changes in-place (so modify + the `*data` directly, and return it as `*outbuf = data`. + - `fuzz_send` (optional): This method can be used if you want to send data to the target yourself, @@ -202,7 +207,7 @@ def deinit(): # optional for Python discovered if compiled with INTROSPECTION. The custom mutator can then return a string (const char *) that reports the exact mutations used. -- `deinit`: +- `deinit` (optional in Python): The last method to be called, deinitializing the state. diff --git a/include/afl-fuzz.h b/include/afl-fuzz.h index 5fd393dd..8b6502b4 100644 --- a/include/afl-fuzz.h +++ b/include/afl-fuzz.h @@ -885,14 +885,19 @@ struct custom_mutator { * A post-processing function to use right before AFL writes the test case to * disk in order to execute the target. * - * (Optional) If this functionality is not needed, simply don't define this + * NOTE: Do not do any random changes to the data in this function! + * + * PERFORMANCE: If you can modify the data in-place you will have a better + * performance. Modify *data and set `*out_buf = data`. + * + * (Optional) If this functionality is not needed, simply do not define this * function. * * @param[in] data pointer returned in afl_custom_init by this custom mutator * @param[in] buf Buffer containing the test case to be executed * @param[in] buf_size Size of the test case * @param[out] out_buf Pointer to the buffer storing the test case after - * processing. External library should allocate memory for out_buf. + * processing. The external library should allocate memory for out_buf. * It can chose to alter buf in-place, if the space is large enough. * @return Size of the output buffer. */ diff --git a/src/afl-fuzz-python.c b/src/afl-fuzz-python.c index 673e5a6c..7dad0770 100644 --- a/src/afl-fuzz-python.c +++ b/src/afl-fuzz-python.c @@ -219,11 +219,14 @@ static py_mutator_t *init_py_module(afl_state_t *afl, u8 *module_name) { if (py_module != NULL) { - u8 py_notrim = 0, py_idx; - /* init, required */ + u8 py_notrim = 0; py_functions[PY_FUNC_INIT] = PyObject_GetAttrString(py_module, "init"); - if (!py_functions[PY_FUNC_INIT]) - FATAL("init function not found in python module"); + if (!py_functions[PY_FUNC_INIT]) { + + WARNF("init function not found in python module"); + + } + py_functions[PY_FUNC_FUZZ] = PyObject_GetAttrString(py_module, "fuzz"); if (!py_functions[PY_FUNC_FUZZ]) py_functions[PY_FUNC_FUZZ] = PyObject_GetAttrString(py_module, "mutate"); @@ -231,12 +234,6 @@ static py_mutator_t *init_py_module(afl_state_t *afl, u8 *module_name) { PyObject_GetAttrString(py_module, "describe"); py_functions[PY_FUNC_FUZZ_COUNT] = PyObject_GetAttrString(py_module, "fuzz_count"); - if (!py_functions[PY_FUNC_FUZZ]) { - - WARNF("fuzz function not found in python module"); - - } - py_functions[PY_FUNC_POST_PROCESS] = PyObject_GetAttrString(py_module, "post_process"); py_functions[PY_FUNC_INIT_TRIM] = @@ -263,36 +260,6 @@ static py_mutator_t *init_py_module(afl_state_t *afl, u8 *module_name) { if (!py_functions[PY_FUNC_DEINIT]) WARNF("deinit function not found in python module"); - for (py_idx = 0; py_idx < PY_FUNC_COUNT; ++py_idx) { - - if (!py_functions[py_idx] || !PyCallable_Check(py_functions[py_idx])) { - - if (py_idx >= PY_FUNC_INIT_TRIM && py_idx <= PY_FUNC_TRIM) { - - // Implementing the trim API is optional for now - if (PyErr_Occurred()) { PyErr_Print(); } - py_notrim = 1; - - } else if (py_idx >= PY_OPTIONAL) { - - // Only _init and _deinit are not optional currently - - if (PyErr_Occurred()) { PyErr_Print(); } - - } else { - - fprintf(stderr, - "Cannot find/call function with index %d in external " - "Python module.\n", - py_idx); - return NULL; - - } - - } - - } - if (py_notrim) { py_functions[PY_FUNC_INIT_TRIM] = NULL; @@ -345,6 +312,8 @@ static void init_py(afl_state_t *afl, py_mutator_t *py_mutator, (void)afl; + if (py_mutator->py_functions[PY_FUNC_INIT] == NULL) { return; } + PyObject *py_args, *py_value; /* Provide the init function a seed for the Python RNG */ diff --git a/src/afl-fuzz-run.c b/src/afl-fuzz-run.c index f5425011..26e8549d 100644 --- a/src/afl-fuzz-run.c +++ b/src/afl-fuzz-run.c @@ -133,7 +133,12 @@ write_to_testcase(afl_state_t *afl, void **mem, u32 len, u32 fix) { } - if (new_mem != *mem) { *mem = new_mem; } + if (new_mem != *mem && new_mem != NULL && new_size > 0) { + + *mem = afl_realloc((void **)mem, new_size); + memmove(*mem, new_mem, new_size); + + } if (unlikely(afl->custom_mutators_count)) { -- cgit 1.4.1