From 9548af52b266ecc2aed81f388f7a1a7a3fcfb181 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Sat, 14 Jan 2023 09:30:25 +0100 Subject: texts --- custom_mutators/autotokens/README | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 custom_mutators/autotokens/README (limited to 'custom_mutators/autotokens/README') diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README new file mode 100644 index 00000000..6849279e --- /dev/null +++ b/custom_mutators/autotokens/README @@ -0,0 +1,12 @@ +# autotokens + +This implements an improved autotoken idea presented in +[Token-Level Fuzzing][https://www.usenix.org/system/files/sec21-salls.pdf]. +It is a grammar fuzzer without actually knowing the grammar. + +It is recommended to run with together in an instance with `CMPLOG`. + +If you have a dictionary (`-x`) this improves this custom grammar mutator. + +If **not** run with `CMPLOG`, it is possible to set `AFL_CUSTOM_MUTATOR_ONLY`, +to concentrate on grammar bug classes. -- cgit 1.4.1 From 33f41e3974348d3b0b71b3a30a6483bb0418068c Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Tue, 17 Jan 2023 09:52:35 +0100 Subject: autotokens: print stats at exit --- custom_mutators/autotokens/README | 7 ++++--- custom_mutators/autotokens/autotokens.cpp | 12 ++++++++++++ include/config.h | 4 ++-- 3 files changed, 18 insertions(+), 5 deletions(-) (limited to 'custom_mutators/autotokens/README') diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README index 6849279e..0dcc6a3e 100644 --- a/custom_mutators/autotokens/README +++ b/custom_mutators/autotokens/README @@ -1,6 +1,6 @@ # autotokens -This implements an improved autotoken idea presented in +This implements an improved autotoken grammar fuzzing idea presented in [Token-Level Fuzzing][https://www.usenix.org/system/files/sec21-salls.pdf]. It is a grammar fuzzer without actually knowing the grammar. @@ -8,5 +8,6 @@ It is recommended to run with together in an instance with `CMPLOG`. If you have a dictionary (`-x`) this improves this custom grammar mutator. -If **not** run with `CMPLOG`, it is possible to set `AFL_CUSTOM_MUTATOR_ONLY`, -to concentrate on grammar bug classes. +If **not** running with `CMPLOG`, it is possible to set +`AFL_CUSTOM_MUTATOR_ONLY` to concentrate on grammar bug classes. + diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 850692a1..d6b269fd 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -35,6 +35,7 @@ static u32 valid_structures; static u32 whitespace_ids; static u32 extras_cnt, a_extras_cnt; static u64 all_spaces, all_tabs, all_lf, all_ws; +static u64 all_structure_items; static unordered_map *> file_mapping; static unordered_map token_to_id; static unordered_map id_to_token; @@ -519,6 +520,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, file_mapping[fn] = structure; s = structure; ++valid_structures; + all_structure_items += structure->size(); // we are done! DEBUG(stderr, "DONE! We have %lu tokens in the structure\n", @@ -586,6 +588,16 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { extern "C" void afl_custom_deinit(my_mutator_t *data) { + /* we use this to print statistics at exit :-) + needs to be stderr as stdout is filtered */ + + fprintf(stderr, + "\n\nAutotoken mutator statistics:\n" + " Number of all seen tokens: %lu\n" + " Number of input structures: %lu\n" + " Number of all items in structures: %lu\n\n", + current_id - 1, valid_structures, all_structure_items); + free(data); } diff --git a/include/config.h b/include/config.h index 6cfaac11..f8a742f2 100644 --- a/include/config.h +++ b/include/config.h @@ -364,9 +364,9 @@ * * ***********************************************************/ -/* Call count interval between reseeding the libc PRNG from /dev/urandom: */ +/* Call count interval between reseeding the PRNG from /dev/urandom: */ -#define RESEED_RNG 100000 +#define RESEED_RNG 2500000 /* The default maximum testcase cache size in MB, 0 = disable. A value between 50 and 250 is a good default value. Note that the -- cgit 1.4.1 From a41fd5cc5c4a5073f38adf06270e2985c88da9d5 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 18 Jan 2023 11:46:28 +0100 Subject: alternate tokenize, options --- custom_mutators/autotokens/README | 9 + custom_mutators/autotokens/autotokens.cpp | 432 ++++++++++++++++++++++++------ 2 files changed, 365 insertions(+), 76 deletions(-) (limited to 'custom_mutators/autotokens/README') diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README index 0dcc6a3e..f6e9c753 100644 --- a/custom_mutators/autotokens/README +++ b/custom_mutators/autotokens/README @@ -11,3 +11,12 @@ If you have a dictionary (`-x`) this improves this custom grammar mutator. If **not** running with `CMPLOG`, it is possible to set `AFL_CUSTOM_MUTATOR_ONLY` to concentrate on grammar bug classes. +## Configuration via environment variables + +`AUTOTOKENS_ONLY_FAV` - only use this mutator on favorite queue items +`AUTOTOKENS_COMMENT` - what character or string starts a comment which will be + removed. Default: `/* ... */` +`AUTOTOKENS_ALTERNATIVE_TOKENIZE` - use an alternative tokenize implementation + (experimental) +`AUTOTOKENS_WHITESPACE` - whitespace string to use for ALTERNATIVE_TOKENIZE, + default is " " diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 5580512a..28ef91e2 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -15,7 +15,10 @@ extern "C" { #include #define AUTOTOKENS_DEBUG 0 +#define AUTOTOKENS_ONLY_FAV 0 +#define AUTOTOKENS_ALTERNATIVE_TOKENIZE 0 #define AUTOTOKENS_CHANGE_MIN 8 +#define AUTOTOKENS_WHITESPACE " " using namespace std; @@ -30,6 +33,8 @@ typedef struct my_mutator { static afl_state *afl_ptr; static int debug = AUTOTOKENS_DEBUG; +static int only_fav = AUTOTOKENS_ONLY_FAV; +static int alternative_tokenize = AUTOTOKENS_ALTERNATIVE_TOKENIZE; static u32 current_id; static u32 valid_structures; static u32 whitespace_ids; @@ -39,9 +44,12 @@ static u64 all_structure_items; static unordered_map *> file_mapping; static unordered_map token_to_id; static unordered_map id_to_token; -// static regex regex_comment_slash("(//.*)([\r\n]?)", regex::optimize); +static string whitespace = AUTOTOKENS_WHITESPACE; +static regex *regex_comment_custom; static regex regex_comment_star("/\\*([:print:]|\n)*?\\*/", regex::multiline | regex::optimize); +static regex regex_word("[A-Za-z0-9_$]+", regex::optimize); +static regex regex_whitespace(R"([ \t]+)", regex::optimize); static regex regex_string("\"[[:print:]]*?\"|'[[:print:]]*?'", regex::optimize); static vector *s; // the structure of the currently selected input @@ -84,15 +92,15 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, afl_ptr->havoc_div / 256)); // DEBUG(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds); - u32 max_rand = 4; + u32 max_rand = 7; for (i = 0; i < rounds; ++i) { switch (rand_below(afl_ptr, max_rand)) { /* CHANGE */ - case 0: /* fall through */ - case 1: { + case 0 ... 3: /* fall through */ + { u32 pos = rand_below(afl_ptr, m_size); u32 cur_item = m[pos], new_item; @@ -103,8 +111,9 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } while (unlikely( new_item == cur_item || - (whitespace_ids < new_item && whitespace_ids >= cur_item) || - (whitespace_ids >= new_item && whitespace_ids < cur_item))); + (!alternative_tokenize && + ((whitespace_ids < new_item && whitespace_ids >= cur_item) || + (whitespace_ids >= new_item && whitespace_ids < cur_item))))); DEBUG(stderr, "MUT: %u -> %u\n", cur_item, new_item); m[pos] = new_item; @@ -113,7 +122,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } /* INSERT (m_size +1 so we insert also after last place) */ - case 2: { + case 4 ... 5: { u32 new_item; do { @@ -126,26 +135,30 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, m.insert(m.begin() + pos, new_item); ++m_size; - // if we insert an identifier or string we might need whitespace - if (id_to_token[new_item].size() > 1) { + if (likely(!alternative_tokenize)) { - // need to insert before? + // if we insert an identifier or string we might need whitespace + if (id_to_token[new_item].size() > 1) { - if (pos && m[pos - 1] >= whitespace_ids && - id_to_token[m[pos - 1]].size() > 1) { + // need to insert before? - m.insert(m.begin() + pos, good_whitespace_or_singleval()); - ++m_size; + if (pos && m[pos - 1] >= whitespace_ids && + id_to_token[m[pos - 1]].size() > 1) { - } + m.insert(m.begin() + pos, good_whitespace_or_singleval()); + ++m_size; + + } + + if (pos + 1 < m_size && m[pos + 1] >= whitespace_ids && + id_to_token[m[pos + 1]].size() > 1) { - if (pos + 1 < m_size && m[pos + 1] >= whitespace_ids && - id_to_token[m[pos + 1]].size() > 1) { + // need to insert after? - // need to insert after? + m.insert(m.begin() + pos + 1, good_whitespace_or_singleval()); + ++m_size; - m.insert(m.begin() + pos + 1, good_whitespace_or_singleval()); - ++m_size; + } } @@ -156,7 +169,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } /* ERASE - only if large enough */ - case 3: { + case 6: { if (m_size > 8) { @@ -165,7 +178,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } else { - max_rand = 3; + max_rand = 6; } @@ -180,10 +193,16 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } string output; + u32 m_size_1 = m_size - 1; for (i = 0; i < m_size; ++i) { output += id_to_token[m[i]]; + if (unlikely(alternative_tokenize && i < m_size_1)) { + + output += whitespace; + + } } @@ -219,7 +238,8 @@ extern "C" unsigned char afl_custom_queue_get(void *data, if (likely(!debug)) { - if (afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) { + if ((afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) || + (only_fav && !afl_ptr->queue_cur->favored)) { s = NULL; return 0; @@ -353,8 +373,15 @@ extern "C" unsigned char afl_custom_queue_get(void *data, // DEBUG(stderr, "Read %lu bytes for %s\nBefore comment trim:\n%s\n", // input.size(), filename, input.c_str()); - // input = regex_replace(input, regex_comment_slash, "$2"); - input = regex_replace(input, regex_comment_star, ""); + if (regex_comment_custom) { + + input = regex_replace(input, *regex_comment_custom, "$2"); + + } else { + + input = regex_replace(input, regex_comment_star, ""); + + } DEBUG(stderr, "After replace %lu bytes for %s\n%s\n", input.size(), filename, input.c_str()); @@ -377,53 +404,105 @@ extern "C" unsigned char afl_custom_queue_get(void *data, DEBUG(stderr, "START!\n"); - while (regex_search(cur, ende, match, regex_string, - regex_constants::match_any | - regex_constants::match_not_null | - regex_constants::match_continuous)) { + if (likely(!alternative_tokenize)) { + + while (regex_search(cur, ende, match, regex_string, + regex_constants::match_any | + regex_constants::match_not_null | + regex_constants::match_continuous)) { + + prev = cur; + found = match[0].first; + cur = match[0].second; + DEBUG(stderr, + "string %s found at start %lu offset %lu continue at %lu\n", + match[0].str().c_str(), prev - input.begin(), match.position(), + cur - input.begin()); + + if (prev < found) { // there are items between search start and find + while (prev < found) { - prev = cur; - found = match[0].first; - cur = match[0].second; - DEBUG(stderr, "string %s found at start %lu offset %lu continue at %lu\n", - match[0].str().c_str(), prev - input.begin(), match.position(), - cur - input.begin()); + if (isspace(*prev)) { - if (prev < found) { // there are items between search start and find - while (prev < found) { + auto start = prev; + while (isspace(*prev)) { - if (isspace(*prev)) { + ++prev; - auto start = prev; - while (isspace(*prev)) { + } + tokens.push_back(std::string(start, prev)); + DEBUG(stderr, "WHITESPACE %ld \"%s\"\n", prev - start, + tokens[tokens.size() - 1].c_str()); + + } else if (isalnum(*prev) || *prev == '$' || *prev == '_') { + + auto start = prev; + while (isalnum(*prev) || *prev == '$' || *prev == '_' || + *prev == '.' || *prev == '/') { + + ++prev; + + } + + tokens.push_back(std::string(start, prev)); + DEBUG(stderr, "IDENTIFIER %ld \"%s\"\n", prev - start, + tokens[tokens.size() - 1].c_str()); + + } else { + + tokens.push_back(std::string(prev, prev + 1)); + DEBUG(stderr, "OTHER \"%c\"\n", *prev); ++prev; } - tokens.push_back(std::string(start, prev)); - DEBUG(stderr, "WHITESPACE %ld \"%s\"\n", prev - start, + } + + } + + if (match[0].length() > 0) { tokens.push_back(match[0]); } + + } + + DEBUG(stderr, "AFTER all strings\n"); + + if (cur < ende) { + + while (cur < ende) { + + if (isspace(*cur)) { + + auto start = cur; + while (isspace(*cur)) { + + ++cur; + + } + + tokens.push_back(std::string(start, cur)); + DEBUG(stderr, "WHITESPACE %ld \"%s\"\n", cur - start, tokens[tokens.size() - 1].c_str()); - } else if (isalnum(*prev) || *prev == '$' || *prev == '_') { + } else if (isalnum(*cur) || *cur == '$' || *cur == '_') { - auto start = prev; - while (isalnum(*prev) || *prev == '$' || *prev == '_' || - *prev == '.' || *prev == '/') { + auto start = cur; + while (isalnum(*cur) || *cur == '$' || *cur == '_' || *cur == '.' || + *cur == '/') { - ++prev; + ++cur; } - tokens.push_back(std::string(start, prev)); - DEBUG(stderr, "IDENTIFIER %ld \"%s\"\n", prev - start, + tokens.push_back(std::string(start, cur)); + DEBUG(stderr, "IDENTIFIER %ld \"%s\"\n", cur - start, tokens[tokens.size() - 1].c_str()); } else { - tokens.push_back(std::string(prev, prev + 1)); - DEBUG(stderr, "OTHER \"%c\"\n", *prev); - ++prev; + tokens.push_back(std::string(cur, cur + 1)); + DEBUG(stderr, "OTHER \"%c\"\n", *cur); + ++cur; } @@ -431,48 +510,227 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - if (match[0].length() > 0) { tokens.push_back(match[0]); } + } else { - } + // alternative tokenize - DEBUG(stderr, "AFTER all strings\n"); + while (regex_search(cur, ende, match, regex_string)) { - if (cur < ende) { + prev = cur; + found = match[0].first; + cur = match[0].second; + DEBUG(stderr, + "string %s found at start %lu offset %lu continue at %lu\n", + match[0].str().c_str(), prev - input.begin(), match.position(), + cur - input.begin()); + if (prev < found) { // there are items between search start and find + sregex_token_iterator it{prev, found, regex_whitespace, -1}; + vector tokenized{it, {}}; + tokenized.erase(std::remove_if(tokenized.begin(), tokenized.end(), + [](std::string const &s) { - while (cur < ende) { + return s.size() == 0; - if (isspace(*cur)) { + }), - auto start = cur; - while (isspace(*cur)) { + tokenized.end()); + tokens.reserve(tokens.size() + tokenized.size() * 2 + 1); - ++cur; + if (unlikely(debug)) { + + DEBUG(stderr, "tokens: %lu input size: %lu\n", tokenized.size(), + input.size()); + for (auto x : tokenized) { + + cerr << x << endl; + + } } - tokens.push_back(std::string(start, cur)); - DEBUG(stderr, "WHITESPACE %ld \"%s\"\n", cur - start, - tokens[tokens.size() - 1].c_str()); + for (auto token : tokenized) { - } else if (isalnum(*cur) || *cur == '$' || *cur == '_') { + string::const_iterator c = token.begin(), e = token.end(), f, p; + smatch m; - auto start = cur; - while (isalnum(*cur) || *cur == '$' || *cur == '_' || *cur == '.' || - *cur == '/') { + while (regex_search(c, e, m, regex_word)) { - ++cur; + p = c; + f = m[0].first; + c = m[0].second; + if (p < f) { + + // there are items between search start and find + while (p < f) { + + if (unlikely(debug)) { + + string foo(p, p + 1); + DEBUG(stderr, "before string: \"%s\"\n", foo.c_str()); + + } + + tokens.push_back(std::string(p, p + 1)); + ++p; + + } + + /* + string foo(p, f); + DEBUG(stderr, "before string: \"%s\"\n", + foo.c_str()); tokens.push_back(std::string(p, f)); + */ + + } + + DEBUG( + stderr, + "SUBstring \"%s\" found at start %lu offset %lu continue at " + "%lu\n", + m[0].str().c_str(), p - input.begin(), m.position(), + c - token.begin()); + tokens.push_back(m[0].str()); + + } + + if (c < e) { + + while (c < e) { + + if (unlikely(debug)) { + + string foo(c, c + 1); + DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); + + } + + tokens.push_back(std::string(c, c + 1)); + ++c; + + } + + /* + if (unlikely(debug)) { + + string foo(c, e); + DEBUG(stderr, "after string: \"%s\"\n", + foo.c_str()); + + } + + tokens.push_back(std::string(c, e)); + */ + + } } - tokens.push_back(std::string(start, cur)); - DEBUG(stderr, "IDENTIFIER %ld \"%s\"\n", cur - start, - tokens[tokens.size() - 1].c_str()); + } + + if (match[0].length() > 0) { tokens.push_back(match[0]); } - } else { + } + + if (cur < ende) { + + sregex_token_iterator it{cur, ende, regex_whitespace, -1}; + vector tokenized{it, {}}; + tokenized.erase( + std::remove_if(tokenized.begin(), tokenized.end(), + [](std::string const &s) { return s.size() == 0; }), + tokenized.end()); + tokens.reserve(tokens.size() + tokenized.size() * 2 + 1); + + if (unlikely(debug)) { + + DEBUG(stderr, "tokens: %lu input size: %lu\n", tokenized.size(), + input.size()); + for (auto x : tokenized) { + + cerr << x << endl; - tokens.push_back(std::string(cur, cur + 1)); - DEBUG(stderr, "OTHER \"%c\"\n", *cur); - ++cur; + } + + } + + for (auto token : tokenized) { + + string::const_iterator c = token.begin(), e = token.end(), f, p; + smatch m; + + while (regex_search(c, e, m, regex_word)) { + + p = c; + f = m[0].first; + c = m[0].second; + if (p < f) { + + // there are items between search start and find + while (p < f) { + + if (unlikely(debug)) { + + string foo(p, p + 1); + DEBUG(stderr, "before string: \"%s\"\n", foo.c_str()); + + } + + tokens.push_back(std::string(p, p + 1)); + ++p; + + } + + /* + if (unlikely(debug)) { + + string foo(p, f); + DEBUG(stderr, "before string: \"%s\"\n", + foo.c_str()); + + } + + tokens.push_back(std::string(p, f)); + */ + + } + + DEBUG(stderr, + "SUB2string \"%s\" found at start %lu offset %lu continue at " + "%lu\n", + m[0].str().c_str(), p - input.begin(), m.position(), + c - token.begin()); + tokens.push_back(m[0].str()); + + } + + if (c < e) { + + while (c < e) { + + if (unlikely(debug)) { + + string foo(c, c + 1); + DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); + + } + + tokens.push_back(std::string(c, c + 1)); + ++c; + + } + + /* + if (unlikely(debug)) { + + string foo(c, e); + DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); + + } + + tokens.push_back(std::string(c, e)); + */ + + } } @@ -483,9 +741,15 @@ extern "C" unsigned char afl_custom_queue_get(void *data, if (unlikely(debug)) { DEBUG(stderr, "DUMPING TOKENS:\n"); + u32 size_1 = tokens.size() - 1; for (u32 i = 0; i < tokens.size(); ++i) { DEBUG(stderr, "%s", tokens[i].c_str()); + if (unlikely(alternative_tokenize && i < size_1)) { + + DEBUG(stderr, "%s", whitespace.c_str()); + + } } @@ -556,6 +820,22 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { } + if (getenv("AUTOTOKENS_ONLY_FAV")) { only_fav = 1; } + if (getenv("AUTOTOKENS_ALTERNATIVE_TOKENIZE")) { alternative_tokenize = 1; } + if (getenv("AUTOTOKENS_WHITESPACE")) { + + whitespace = getenv("AUTOTOKENS_WHITESPACE"); + + } + + if (getenv("AUTOTOKENS_COMMENT")) { + + char buf[256]; + snprintf(buf, sizeof(buf), "(%s.*)([\r\n]?)", getenv("AUTOTOKENS_COMMENT")); + regex_comment_custom = new regex(buf, regex::optimize); + + } + data->afl = afl_ptr = afl; // set common whitespace tokens -- cgit 1.4.1 From 17752465e6b3c70fd0104fae7bb1f84c1cb8bb66 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 18 Jan 2023 22:31:55 +0100 Subject: nit --- custom_mutators/autotokens/README | 2 ++ custom_mutators/autotokens/TODO | 8 +------- custom_mutators/autotokens/autotokens.cpp | 7 ++----- 3 files changed, 5 insertions(+), 12 deletions(-) (limited to 'custom_mutators/autotokens/README') diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README index f6e9c753..f82dcd98 100644 --- a/custom_mutators/autotokens/README +++ b/custom_mutators/autotokens/README @@ -11,6 +11,8 @@ If you have a dictionary (`-x`) this improves this custom grammar mutator. If **not** running with `CMPLOG`, it is possible to set `AFL_CUSTOM_MUTATOR_ONLY` to concentrate on grammar bug classes. +Do **not** set `AFL_DISABLE_TRIM` with this custom mutator! + ## Configuration via environment variables `AUTOTOKENS_ONLY_FAV` - only use this mutator on favorite queue items diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO index 2e5e384f..95b79373 100644 --- a/custom_mutators/autotokens/TODO +++ b/custom_mutators/autotokens/TODO @@ -1,12 +1,6 @@ -whitespace belassen oder notieren? MAYBE -0=space 1=tab 2=linefeed - cmplog: only add tokens that were found to fit? create from thin air if no good seed after a cycle and dict large enough? (static u32 no_of_struct_inputs;) -splice insert, splice overwrite -(linefeed, semicolon) - - +splicing -> check if whitespace/token is needed \ No newline at end of file diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index c9ec4352..5e683455 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -217,11 +217,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, MIN(m_size - dst_off - AUTOTOKENS_SPLICE_MIN, src_size - src_off - AUTOTOKENS_SPLICE_MIN))); - for (u32 i = 0; i < n; ++i) { - - m[dst_off + i] = (*src)[src_off + i]; - - } + copy(src->begin() + src_off, src->begin() + src_off + n, + m.begin() + dst_off); DEBUG(stderr, "SPLICE-MUT: %u at %u\n", n, dst_off); break; -- cgit 1.4.1 From ec87abda93d68f489f26ed2a2ae75b4f1e26d0bb Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Sat, 4 Feb 2023 14:37:28 +0100 Subject: readme --- custom_mutators/autotokens/README | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'custom_mutators/autotokens/README') diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README index f82dcd98..86e7c9b3 100644 --- a/custom_mutators/autotokens/README +++ b/custom_mutators/autotokens/README @@ -18,6 +18,12 @@ Do **not** set `AFL_DISABLE_TRIM` with this custom mutator! `AUTOTOKENS_ONLY_FAV` - only use this mutator on favorite queue items `AUTOTOKENS_COMMENT` - what character or string starts a comment which will be removed. Default: `/* ... */` +`AUTOTOKENS_FUZZ_COUNT_SHIFT` - reduce the number of fuzzing performed, shifting + the value by this number set, e.g. 1. +`AUTOTOKENS_LEARN_DICT` - learn from dictionaries? + 0 = none + 1 = only -x or autodict + 2 = -x, autodict and `CMPLOG` `AUTOTOKENS_ALTERNATIVE_TOKENIZE` - use an alternative tokenize implementation (experimental) `AUTOTOKENS_WHITESPACE` - whitespace string to use for ALTERNATIVE_TOKENIZE, -- cgit 1.4.1 From e6120282556e4df79c01236849e5f6f225b8e428 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Sun, 5 Feb 2023 14:19:10 +0100 Subject: dict fix --- custom_mutators/autotokens/README | 3 +++ custom_mutators/autotokens/autotokens.cpp | 22 +++++++++++++++------- 2 files changed, 18 insertions(+), 7 deletions(-) (limited to 'custom_mutators/autotokens/README') diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README index 86e7c9b3..d8613232 100644 --- a/custom_mutators/autotokens/README +++ b/custom_mutators/autotokens/README @@ -24,6 +24,9 @@ Do **not** set `AFL_DISABLE_TRIM` with this custom mutator! 0 = none 1 = only -x or autodict 2 = -x, autodict and `CMPLOG` +`AUTOTOKENS_CREATE_FROM_THIN_AIR` - if only one small start file is present and + a dictionary loaded then create one initial + structure based on the dictionary. `AUTOTOKENS_ALTERNATIVE_TOKENIZE` - use an alternative tokenize implementation (experimental) `AUTOTOKENS_WHITESPACE` - whitespace string to use for ALTERNATIVE_TOKENIZE, diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index f1263600..d3ae7e9c 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -29,7 +29,7 @@ extern "C" { #define AUTOTOKENS_SIZE_MIN 8 #define AUTOTOKENS_SPLICE_MIN 4 #define AUTOTOKENS_SPLICE_MAX 64 -#define AUTOTOKENS_CREATE_FROM_THIN_AIR 1 +#define AUTOTOKENS_CREATE_FROM_THIN_AIR 0 #define AUTOTOKENS_FUZZ_COUNT_SHIFT 0 // 0 = no learning, 1 only from -x dict/autodict, 2 also from cmplog #define AUTOTOKENS_LEARN_DICT 2 @@ -506,14 +506,15 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } // check if there are new dictionary entries and add them to the tokens - if (valid_structures && learn_state < learn_dictionary_tokens) { + if (likely(valid_structures || create_from_thin_air) && + learn_state < learn_dictionary_tokens) { if (unlikely(!learn_state)) { learn_state = 1; } while (extras_cnt < afl_ptr->extras_cnt) { u32 ok = 1, l = afl_ptr->extras[extras_cnt].len; - u8 *ptr = afl_ptr->extras[extras_cnt].data; + u8 *buf, *ptr = afl_ptr->extras[extras_cnt].data; for (u32 i = 0; i < l; ++i) { @@ -528,14 +529,17 @@ extern "C" unsigned char afl_custom_queue_get(void *data, if (ok) { - token_to_id[(char *)ptr] = current_id; - id_to_token[current_id] = (char *)ptr; + buf = (u8 *)malloc(afl_ptr->extras[extras_cnt].len + 1); + memcpy(buf, afl_ptr->extras[extras_cnt].data, + afl_ptr->extras[extras_cnt].len); + buf[afl_ptr->extras[extras_cnt].len] = 0; + token_to_id[(char *)buf] = current_id; + id_to_token[current_id] = (char *)buf; ++current_id; } ++extras_cnt; - DEBUGF(stderr, "Added from dictionary: \"%s\"\n", ptr); } @@ -600,8 +604,12 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - file_mapping[fn] = structure; s = structure; + file_mapping[fn] = structure; + id_mapping[valid_structures] = structure; + ++valid_structures; + all_structure_items += structure->size(); + return 1; } -- cgit 1.4.1 From 8a2547073c500fcd637a7b276b7a38313bb70b5f Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Mon, 6 Feb 2023 08:51:20 +0100 Subject: more options --- custom_mutators/autotokens/README | 2 ++ custom_mutators/autotokens/TODO | 4 +++- custom_mutators/autotokens/autotokens.cpp | 26 ++++++++++++++++++++++---- 3 files changed, 27 insertions(+), 5 deletions(-) (limited to 'custom_mutators/autotokens/README') diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README index d8613232..e9c48662 100644 --- a/custom_mutators/autotokens/README +++ b/custom_mutators/autotokens/README @@ -24,6 +24,8 @@ Do **not** set `AFL_DISABLE_TRIM` with this custom mutator! 0 = none 1 = only -x or autodict 2 = -x, autodict and `CMPLOG` +`AUTOTOKENS_CHANGE_MIN` - minimum number of mutations (1-256, default 8) +`AUTOTOKENS_CHANGE_MAX` - maximum number of mutations (1-4096, default 64) `AUTOTOKENS_CREATE_FROM_THIN_AIR` - if only one small start file is present and a dictionary loaded then create one initial structure based on the dictionary. diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO index 528dff1f..496bfd45 100644 --- a/custom_mutators/autotokens/TODO +++ b/custom_mutators/autotokens/TODO @@ -9,7 +9,6 @@ analyse welche einen DICT haben, und welche davon rein ascii corpus analyse: + libxml - - hardbuzz - sqlite - libpcap min len, max len, % wenn 95/98/99/100 ascii @@ -20,3 +19,6 @@ AFL_TXT_MAX_LEN 65535 AFL_TXT_MIN_LEN 16 AFL_TXT_MIN_PERCENT=99 +-> KEIN FAV! + +change_min/_max werte diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index d3ae7e9c..ee35c68b 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -32,7 +32,7 @@ extern "C" { #define AUTOTOKENS_CREATE_FROM_THIN_AIR 0 #define AUTOTOKENS_FUZZ_COUNT_SHIFT 0 // 0 = no learning, 1 only from -x dict/autodict, 2 also from cmplog -#define AUTOTOKENS_LEARN_DICT 2 +#define AUTOTOKENS_LEARN_DICT 1 #ifndef AUTOTOKENS_SPLICE_DISABLE #define AUTOTOKENS_SPLICE_DISABLE 0 #endif @@ -64,6 +64,8 @@ static int alternative_tokenize = AUTOTOKENS_ALTERNATIVE_TOKENIZE; static int learn_dictionary_tokens = AUTOTOKENS_LEARN_DICT; static int fuzz_count_shift = AUTOTOKENS_FUZZ_COUNT_SHIFT; static int create_from_thin_air = AUTOTOKENS_CREATE_FROM_THIN_AIR; +static int change_min = AUTOTOKENS_CHANGE_MIN; +static int change_max = AUTOTOKENS_CHANGE_MAX; static u32 current_id; static u32 valid_structures; static u32 whitespace_ids; @@ -151,8 +153,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, u32 i, m_size = (u32)m.size(); u32 rounds = - MIN(AUTOTOKENS_CHANGE_MAX, - MAX(AUTOTOKENS_CHANGE_MIN, + MIN(change_max, + MAX(change_min, MIN(m_size >> 3, HAVOC_CYCLES * afl_ptr->queue_cur->perf_score * afl_ptr->havoc_div / 256))); // DEBUGF(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds); @@ -1162,7 +1164,7 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { learn_dictionary_tokens = atoi(getenv("AUTOTOKENS_LEARN_DICT")); if (learn_dictionary_tokens < 0 || learn_dictionary_tokens > 2) { - learn_dictionary_tokens = 2; + learn_dictionary_tokens = AUTOTOKENS_LEARN_DICT; } @@ -1175,6 +1177,22 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { } + if (getenv("AUTOTOKENS_CHANGE_MIN")) { + + change_min = atoi(getenv("AUTOTOKENS_CHANGE_MIN")); + if (change_min < 1 || change_min > 256) { change_min = AUTOTOKENS_CHANGE_MIN; } + + } + + if (getenv("AUTOTOKENS_CHANGE_MAX")) { + + change_max = atoi(getenv("AUTOTOKENS_CHANGE_MAX")); + if (change_max < 1 || change_max > 4096) { change_max = AUTOTOKENS_CHANGE_MAX; } + + } + + if (change_max < change_min) { change_max = change_min + 1; } + if (getenv("AUTOTOKENS_WHITESPACE")) { whitespace = getenv("AUTOTOKENS_WHITESPACE"); -- cgit 1.4.1 From 61439859cece05cd3e204af60bb5ff08556c490d Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Mon, 13 Feb 2023 08:26:30 +0100 Subject: cleanup --- custom_mutators/autotokens/README | 4 ---- custom_mutators/autotokens/autotokens.cpp | 8 -------- 2 files changed, 12 deletions(-) (limited to 'custom_mutators/autotokens/README') diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README index e9c48662..904b5fa3 100644 --- a/custom_mutators/autotokens/README +++ b/custom_mutators/autotokens/README @@ -29,7 +29,3 @@ Do **not** set `AFL_DISABLE_TRIM` with this custom mutator! `AUTOTOKENS_CREATE_FROM_THIN_AIR` - if only one small start file is present and a dictionary loaded then create one initial structure based on the dictionary. -`AUTOTOKENS_ALTERNATIVE_TOKENIZE` - use an alternative tokenize implementation - (experimental) -`AUTOTOKENS_WHITESPACE` - whitespace string to use for ALTERNATIVE_TOKENIZE, - default is " " diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index ca738d0b..10afa2c2 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -24,7 +24,6 @@ extern "C" { #define AUTOTOKENS_ONLY_FAV 0 #define AUTOTOKENS_CHANGE_MIN 8 #define AUTOTOKENS_CHANGE_MAX 64 -#define AUTOTOKENS_WHITESPACE " " #define AUTOTOKENS_SIZE_MIN 8 #define AUTOTOKENS_SPLICE_MIN 4 #define AUTOTOKENS_SPLICE_MAX 64 @@ -75,7 +74,6 @@ static unordered_map *> file_mapping; static unordered_map *> id_mapping; static unordered_map token_to_id; static unordered_map id_to_token; -static string whitespace = AUTOTOKENS_WHITESPACE; static string output; static regex *regex_comment_custom; // multiline requires g++-11 libs :( @@ -913,12 +911,6 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { if (change_max < change_min) { change_max = change_min + 1; } - if (getenv("AUTOTOKENS_WHITESPACE")) { - - whitespace = getenv("AUTOTOKENS_WHITESPACE"); - - } - if (getenv("AUTOTOKENS_COMMENT")) { char buf[256]; -- cgit 1.4.1 From 5a0100c6eece0d668c7040ec6e6ed3f59ef0d1ba Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Mon, 13 Feb 2023 10:01:02 +0100 Subject: add to readme --- custom_mutators/autotokens/README | 2 ++ 1 file changed, 2 insertions(+) (limited to 'custom_mutators/autotokens/README') diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README index 904b5fa3..295cd736 100644 --- a/custom_mutators/autotokens/README +++ b/custom_mutators/autotokens/README @@ -20,6 +20,8 @@ Do **not** set `AFL_DISABLE_TRIM` with this custom mutator! removed. Default: `/* ... */` `AUTOTOKENS_FUZZ_COUNT_SHIFT` - reduce the number of fuzzing performed, shifting the value by this number set, e.g. 1. +`AUTOTOKENS_AUTO_DISABLE` - disable this module if the seeds are not ascii + (or no input and no (ascii) dictionary) `AUTOTOKENS_LEARN_DICT` - learn from dictionaries? 0 = none 1 = only -x or autodict -- cgit 1.4.1 From add2eb42c0f0e2b590fcb17427e5fce29c2fdd54 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Thu, 23 Feb 2023 15:26:41 +0100 Subject: nits --- .gitignore | 121 +++++++++++++++++++------------------- custom_mutators/autotokens/README | 7 ++- custom_mutators/autotokens/TODO | 3 - 3 files changed, 66 insertions(+), 65 deletions(-) delete mode 100644 custom_mutators/autotokens/TODO (limited to 'custom_mutators/autotokens/README') diff --git a/.gitignore b/.gitignore index 45d8676c..c01750e1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,104 +1,107 @@ -.test -.test2 -.sync_tmp -.vscode +!coresight_mode +!coresight_mode/coresight-trace +*.dSYM *.o +*.o.tmp +*.pyc *.so *.swp -*.pyc -*.dSYM -as -a.out -ld -in -out -core* -compile_commands.json +.sync_tmp +.test +.test2 +.vscode afl-analyze +afl-analyze.8 afl-as +afl-as.8 +afl-c++ +afl-c++.8 +afl-cc +afl-cc.8 afl-clang afl-clang++ afl-clang-fast afl-clang-fast++ +afl-clang-fast++.8 +afl-clang-fast.8 afl-clang-lto afl-clang-lto++ +afl-clang-lto++.8 +afl-clang-lto.8 +afl-cmin.8 +afl-cmin.bash.8 +afl-cs-proxy +afl-frida-trace.so afl-fuzz +afl-fuzz.8 afl-g++ +afl-g++.8 afl-gcc +afl-gcc.8 afl-gcc-fast +afl-gcc-fast.8 afl-g++-fast +afl-g++-fast.8 afl-gotcpu +afl-gotcpu.8 afl-ld afl-ld-lto -afl-cs-proxy +afl-lto +afl-lto++ +afl-lto++.8 +afl-lto.8 +afl-persistent-config.8 +afl-plot.8 afl-qemu-trace afl-showmap -afl-tmin -afl-analyze.8 -afl-as.8 -afl-clang-fast++.8 -afl-clang-fast.8 -afl-clang-lto.8 -afl-clang-lto++.8 -afl-cmin.8 -afl-cmin.bash.8 -afl-fuzz.8 -afl-c++.8 -afl-cc.8 -afl-gcc.8 -afl-g++.8 -afl-gcc-fast.8 -afl-g++-fast.8 -afl-gotcpu.8 -afl-plot.8 afl-showmap.8 afl-system-config.8 +afl-tmin afl-tmin.8 afl-whatsup.8 -afl-persistent-config.8 -afl-c++ -afl-cc -afl-lto -afl-lto++ -afl-lto++.8 -afl-lto.8 -qemu_mode/libcompcov/compcovtest -qemu_mode/qemu-* -qemu_mode/qemuafl -unicorn_mode/samples/*/\.test-* -unicorn_mode/samples/*/output/ -test/unittests/unit_maybe_alloc -test/unittests/unit_preallocable -test/unittests/unit_list -test/unittests/unit_rand -test/unittests/unit_hash -examples/afl_network_proxy/afl-network-server -examples/afl_network_proxy/afl-network-client +a.out +as +compile_commands.json +core* examples/afl_frida/afl-frida -examples/afl_frida/libtestinstr.so examples/afl_frida/frida-gum-example.c examples/afl_frida/frida-gum.h +examples/afl_frida/libtestinstr.so +examples/afl_network_proxy/afl-network-client +examples/afl_network_proxy/afl-network-server examples/aflpp_driver/libAFLDriver.a examples/aflpp_driver/libAFLQemuDriver.a +gmon.out +in +ld libAFLDriver.a libAFLQemuDriver.a +out +qemu_mode/libcompcov/compcovtest +qemu_mode/qemu-* +qemu_mode/qemuafl test/.afl_performance test-instr test/output +test/test-c +test/test-cmplog +test/test-compcov test/test-instr.ts test/test-persistent -gmon.out -afl-frida-trace.so +test/unittests/unit_hash +test/unittests/unit_list +test/unittests/unit_maybe_alloc +test/unittests/unit_preallocable +test/unittests/unit_rand +unicorn_mode/samples/*/output/ +unicorn_mode/samples/*/\.test-* utils/afl_network_proxy/afl-network-client utils/afl_network_proxy/afl-network-server -utils/plot_ui/afl-plot-ui -*.o.tmp utils/afl_proxy/afl-proxy utils/optimin/build utils/optimin/optimin utils/persistent_mode/persistent_demo utils/persistent_mode/persistent_demo_new utils/persistent_mode/test-instr -!coresight_mode -!coresight_mode/coresight-trace -vuln_prog \ No newline at end of file +utils/plot_ui/afl-plot-ui +vuln_prog diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README index 295cd736..cca168fd 100644 --- a/custom_mutators/autotokens/README +++ b/custom_mutators/autotokens/README @@ -1,8 +1,9 @@ -# autotokens +# Autotokens This implements an improved autotoken grammar fuzzing idea presented in [Token-Level Fuzzing][https://www.usenix.org/system/files/sec21-salls.pdf]. -It is a grammar fuzzer without actually knowing the grammar. +It is a grammar fuzzer without actually knowing the grammar, but only works +with text based inputs. It is recommended to run with together in an instance with `CMPLOG`. @@ -19,7 +20,7 @@ Do **not** set `AFL_DISABLE_TRIM` with this custom mutator! `AUTOTOKENS_COMMENT` - what character or string starts a comment which will be removed. Default: `/* ... */` `AUTOTOKENS_FUZZ_COUNT_SHIFT` - reduce the number of fuzzing performed, shifting - the value by this number set, e.g. 1. + the value by this number, e.g. 1. `AUTOTOKENS_AUTO_DISABLE` - disable this module if the seeds are not ascii (or no input and no (ascii) dictionary) `AUTOTOKENS_LEARN_DICT` - learn from dictionaries? diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO deleted file mode 100644 index 2e99e147..00000000 --- a/custom_mutators/autotokens/TODO +++ /dev/null @@ -1,3 +0,0 @@ -env für menge an per mutation run - -change_min/_max werte -- cgit 1.4.1