aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorvanhauser-thc <vh@thc.org>2023-02-13 08:14:04 +0100
committervanhauser-thc <vh@thc.org>2023-02-13 08:14:04 +0100
commit7eaef449a1e92999c89df23ab474b3be3da595f8 (patch)
treeb949164a8262c8229452f72101e7239dcdca9925
parentca063c92d20f4dee6ae9fd1d48dc531768b14ca5 (diff)
downloadafl++-7eaef449a1e92999c89df23ab474b3be3da595f8.tar.gz
remove ALTERNATIVE_TOKENIZE
-rw-r--r--custom_mutators/autotokens/autotokens.cpp522
1 files changed, 136 insertions, 386 deletions
diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index ee35c68b..a027ac2b 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -22,7 +22,6 @@ extern "C" {
#define AUTOTOKENS_DEBUG 0
#define AUTOTOKENS_ONLY_FAV 0
-#define AUTOTOKENS_ALTERNATIVE_TOKENIZE 0
#define AUTOTOKENS_CHANGE_MIN 8
#define AUTOTOKENS_CHANGE_MAX 64
#define AUTOTOKENS_WHITESPACE " "
@@ -60,7 +59,6 @@ typedef struct my_mutator {
static afl_state *afl_ptr;
static int debug = AUTOTOKENS_DEBUG;
static int only_fav = AUTOTOKENS_ONLY_FAV;
-static int alternative_tokenize = AUTOTOKENS_ALTERNATIVE_TOKENIZE;
static int learn_dictionary_tokens = AUTOTOKENS_LEARN_DICT;
static int fuzz_count_shift = AUTOTOKENS_FUZZ_COUNT_SHIFT;
static int create_from_thin_air = AUTOTOKENS_CREATE_FROM_THIN_AIR;
@@ -142,7 +140,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
(void)(data);
- if (s == NULL) {
+ if (unlikely(s == NULL)) {
*out_buf = NULL;
return 0;
@@ -183,9 +181,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
} while (unlikely(
new_item == cur_item ||
- (!alternative_tokenize &&
- ((whitespace_ids < new_item && whitespace_ids >= cur_item) ||
- (whitespace_ids >= new_item && whitespace_ids < cur_item)))));
+ ((whitespace_ids < new_item && whitespace_ids >= cur_item) ||
+ (whitespace_ids >= new_item && whitespace_ids < cur_item))));
DEBUGF(stderr, "MUT: %u -> %u\n", cur_item, new_item);
m[pos] = new_item;
@@ -200,37 +197,33 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
new_item = rand_below(afl_ptr, current_id);
- } while (unlikely(!alternative_tokenize && new_item >= whitespace_ids));
+ } while (unlikely(new_item >= whitespace_ids));
u32 pos = rand_below(afl_ptr, m_size + 1);
m.insert(m.begin() + pos, new_item);
++m_size;
DEBUGF(stderr, "INS: %u at %u\n", new_item, pos);
- if (likely(!alternative_tokenize)) {
+ // if we insert an identifier or string we might need whitespace
+ if (id_to_token[new_item].size() > 1) {
- // if we insert an identifier or string we might need whitespace
- if (id_to_token[new_item].size() > 1) {
+ // need to insert before?
- // need to insert before?
+ if (pos && m[pos - 1] >= whitespace_ids &&
+ id_to_token[m[pos - 1]].size() > 1) {
- if (pos && m[pos - 1] >= whitespace_ids &&
- id_to_token[m[pos - 1]].size() > 1) {
-
- m.insert(m.begin() + pos, good_whitespace_or_singleval());
- ++m_size;
-
- }
+ m.insert(m.begin() + pos, good_whitespace_or_singleval());
+ ++m_size;
- if (pos + 1 < m_size && m[pos + 1] >= whitespace_ids &&
- id_to_token[m[pos + 1]].size() > 1) {
+ }
- // need to insert after?
+ if (pos + 1 < m_size && m[pos + 1] >= whitespace_ids &&
+ id_to_token[m[pos + 1]].size() > 1) {
- m.insert(m.begin() + pos + 1, good_whitespace_or_singleval());
- ++m_size;
+ // need to insert after?
- }
+ m.insert(m.begin() + pos + 1, good_whitespace_or_singleval());
+ ++m_size;
}
@@ -290,26 +283,22 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
}
- if (likely(!alternative_tokenize)) {
-
- // do we need a whitespace/token at the beginning?
- if (dst_off && id_to_token[m[dst_off - 1]].size() > 1 &&
- id_to_token[m[dst_off]].size() > 1) {
+ // do we need a whitespace/token at the beginning?
+ if (dst_off && id_to_token[m[dst_off - 1]].size() > 1 &&
+ id_to_token[m[dst_off]].size() > 1) {
- m.insert(m.begin() + dst_off, good_whitespace_or_singleval());
- ++m_size;
+ m.insert(m.begin() + dst_off, good_whitespace_or_singleval());
+ ++m_size;
- }
+ }
- // do we need a whitespace/token at the end?
- if (dst_off + n < m_size &&
- id_to_token[m[dst_off + n - 1]].size() > 1 &&
- id_to_token[m[dst_off + n]].size() > 1) {
+ // do we need a whitespace/token at the end?
+ if (dst_off + n < m_size &&
+ id_to_token[m[dst_off + n - 1]].size() > 1 &&
+ id_to_token[m[dst_off + n]].size() > 1) {
- m.insert(m.begin() + dst_off + n, good_whitespace_or_singleval());
- ++m_size;
-
- }
+ m.insert(m.begin() + dst_off + n, good_whitespace_or_singleval());
+ ++m_size;
}
@@ -332,8 +321,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
// if what we delete will result in a missing whitespace/token,
// instead of deleting we switch the item to a whitespace or token.
- if (likely(!alternative_tokenize) && pos && pos + 1 < m_size &&
- id_to_token[m[pos - 1]].size() > 1 &&
+ if (pos && pos + 1 < m_size && id_to_token[m[pos - 1]].size() > 1 &&
id_to_token[m[pos + 1]].size() > 1) {
m[pos] = good_whitespace_or_singleval();
@@ -362,17 +350,11 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
}
- u32 m_size_1 = m_size - 1;
output = "";
for (i = 0; i < m_size; ++i) {
output += id_to_token[m[i]];
- if (unlikely(alternative_tokenize && i < m_size_1)) {
-
- output += whitespace;
-
- }
}
@@ -725,109 +707,57 @@ extern "C" unsigned char afl_custom_queue_get(void *data,
DEBUGF(stderr, "START!\n");
- if (likely(!alternative_tokenize)) {
+ while (my_search_string(cur, ende, &match_begin, &match_end)) {
- while (my_search_string(cur, ende, &match_begin, &match_end)) {
+ prev = cur;
+ found = match_begin;
+ cur = match_end;
- prev = cur;
- found = match_begin;
- cur = match_end;
+ IFDEBUG {
- IFDEBUG {
-
- string foo(match_begin, match_end);
- DEBUGF(stderr,
- "string %s found at start %lu offset %lu continue at %lu\n",
- foo.c_str(), prev - input.begin(), found - prev,
- cur - input.begin());
-
- }
-
- if (prev < found) { // there are items between search start and find
- while (prev < found) {
-
- if (isspace(*prev)) {
-
- auto start = prev;
- while (isspace(*prev)) {
-
- ++prev;
-
- }
-
- tokens.push_back(std::string(start, prev));
- DEBUGF(stderr, "WHITESPACE %ld \"%s\"\n", prev - start,
- tokens[tokens.size() - 1].c_str());
-
- } else if (isalnum(*prev) || *prev == '$' || *prev == '_') {
-
- auto start = prev;
- while (isalnum(*prev) || *prev == '$' || *prev == '_' ||
- *prev == '.' || *prev == '/') {
-
- ++prev;
-
- }
-
- tokens.push_back(string(start, prev));
- DEBUGF(stderr, "IDENTIFIER %ld \"%s\"\n", prev - start,
- tokens[tokens.size() - 1].c_str());
-
- } else {
-
- tokens.push_back(string(prev, prev + 1));
- DEBUGF(stderr, "OTHER \"%c\"\n", *prev);
- ++prev;
-
- }
-
- }
-
- }
-
- tokens.push_back(string(match_begin, match_end));
- DEBUGF(stderr, "TOK: %s\n", tokens[tokens.size() - 1].c_str());
+ string foo(match_begin, match_end);
+ DEBUGF(stderr,
+ "string %s found at start %lu offset %lu continue at %lu\n",
+ foo.c_str(), prev - input.begin(), found - prev,
+ cur - input.begin());
}
- DEBUGF(stderr, "AFTER all strings\n");
-
- if (cur < ende) {
+ if (prev < found) { // there are items between search start and find
+ while (prev < found) {
- while (cur < ende) {
+ if (isspace(*prev)) {
- if (isspace(*cur)) {
+ auto start = prev;
+ while (isspace(*prev)) {
- auto start = cur;
- while (isspace(*cur)) {
-
- ++cur;
+ ++prev;
}
- tokens.push_back(std::string(start, cur));
- DEBUGF(stderr, "WHITESPACE %ld \"%s\"\n", cur - start,
+ tokens.push_back(std::string(start, prev));
+ DEBUGF(stderr, "WHITESPACE %ld \"%s\"\n", prev - start,
tokens[tokens.size() - 1].c_str());
- } else if (isalnum(*cur) || *cur == '$' || *cur == '_') {
+ } else if (isalnum(*prev) || *prev == '$' || *prev == '_') {
- auto start = cur;
- while (isalnum(*cur) || *cur == '$' || *cur == '_' || *cur == '.' ||
- *cur == '/') {
+ auto start = prev;
+ while (isalnum(*prev) || *prev == '$' || *prev == '_' ||
+ *prev == '.' || *prev == '/') {
- ++cur;
+ ++prev;
}
- tokens.push_back(std::string(start, cur));
- DEBUGF(stderr, "IDENTIFIER %ld \"%s\"\n", cur - start,
+ tokens.push_back(string(start, prev));
+ DEBUGF(stderr, "IDENTIFIER %ld \"%s\"\n", prev - start,
tokens[tokens.size() - 1].c_str());
} else {
- tokens.push_back(std::string(cur, cur + 1));
- DEBUGF(stderr, "OTHER \"%c\"\n", *cur);
- ++cur;
+ tokens.push_back(string(prev, prev + 1));
+ DEBUGF(stderr, "OTHER \"%c\"\n", *prev);
+ ++prev;
}
@@ -835,226 +765,49 @@ extern "C" unsigned char afl_custom_queue_get(void *data,
}
- } else {
-
- // alternative tokenize
- while (my_search_string(cur, ende, &match_begin, &match_end)) {
-
- prev = cur;
- found = match_begin;
- cur = match_end;
- IFDEBUG {
-
- string foo(match_begin, match_end);
- DEBUGF(stderr,
- "string %s found at start %lu offset %lu continue at %lu\n",
- foo.c_str(), prev - input.begin(), found - prev,
- cur - input.begin());
-
- }
-
- if (prev < found) { // there are items between search start and find
-
- sregex_token_iterator it{prev, found, regex_whitespace, -1};
- vector<std::string> tokenized{it, {}};
- tokenized.erase(std::remove_if(tokenized.begin(), tokenized.end(),
- [](std::string const &s) {
-
- return s.size() == 0;
-
- }),
-
- tokenized.end());
- tokens.reserve(tokens.size() + tokenized.size() * 2 + 1);
-
- IFDEBUG {
-
- DEBUGF(stderr, "tokens1: %lu input size: %lu\n", tokenized.size(),
- input.size());
- for (auto x : tokenized) {
-
- cerr << x << endl;
-
- }
-
- }
-
- for (auto token : tokenized) {
-
- string::const_iterator c = token.begin(), e = token.end(), f, p;
- smatch m;
-
- while (regex_search(c, e, m, regex_word)) {
-
- p = c;
- f = m[0].first;
- c = m[0].second;
- if (p < f) {
-
- // there are items between search start and find
- while (p < f) {
-
- IFDEBUG {
+ tokens.push_back(string(match_begin, match_end));
+ DEBUGF(stderr, "TOK: %s\n", tokens[tokens.size() - 1].c_str());
- string foo(p, p + 1);
- DEBUGF(stderr, "before string: \"%s\"\n", foo.c_str());
-
- }
-
- tokens.push_back(std::string(p, p + 1));
- ++p;
-
- }
-
- IFDEBUG {
-
- string foo(p, f);
- DEBUGF(stderr, "before string: \"%s\"\n", foo.c_str());
- tokens.push_back(std::string(p, f));
-
- }
-
- }
-
- DEBUGF(stderr,
- "SUBstring \"%s\" found at start %lu offset %lu continue "
- "at %lu\n",
- m[0].str().c_str(), p - input.begin(), m.position(),
- c - token.begin());
- tokens.push_back(m[0].str());
-
- }
-
- if (c < e) {
-
- while (c < e) {
-
- IFDEBUG {
-
- string foo(c, c + 1);
- DEBUGF(stderr, "after string: \"%s\"\n", foo.c_str());
-
- }
-
- tokens.push_back(std::string(c, c + 1));
- ++c;
-
- }
-
- IFDEBUG {
-
- string foo(c, e);
- DEBUGF(stderr, "after string: \"%s\"\n", foo.c_str());
-
- }
-
- tokens.push_back(std::string(c, e));
-
- }
-
- }
-
- }
-
- tokens.push_back(string(match_begin, match_end));
+ }
- }
+ DEBUGF(stderr, "AFTER all strings\n");
- if (cur < ende) {
+ if (cur < ende) {
- sregex_token_iterator it{cur, ende, regex_whitespace, -1};
- vector<std::string> tokenized{it, {}};
- tokenized.erase(
- std::remove_if(tokenized.begin(), tokenized.end(),
- [](std::string const &s) { return s.size() == 0; }),
- tokenized.end());
- tokens.reserve(tokens.size() + tokenized.size() * 2 + 1);
+ while (cur < ende) {
- IFDEBUG {
+ if (isspace(*cur)) {
- DEBUGF(stderr, "tokens2: %lu input size: %lu\n", tokenized.size(),
- input.size());
- for (auto x : tokenized) {
+ auto start = cur;
+ while (isspace(*cur)) {
- cerr << x << endl;
+ ++cur;
}
- }
-
- for (auto token : tokenized) {
-
- string::const_iterator c = token.begin(), e = token.end(), f, p;
- smatch m;
-
- while (regex_search(c, e, m, regex_word)) {
-
- p = c;
- f = m[0].first;
- c = m[0].second;
- if (p < f) {
+ tokens.push_back(std::string(start, cur));
+ DEBUGF(stderr, "WHITESPACE %ld \"%s\"\n", cur - start,
+ tokens[tokens.size() - 1].c_str());
- // there are items between search start and find
- while (p < f) {
+ } else if (isalnum(*cur) || *cur == '$' || *cur == '_') {
- IFDEBUG {
+ auto start = cur;
+ while (isalnum(*cur) || *cur == '$' || *cur == '_' || *cur == '.' ||
+ *cur == '/') {
- string foo(p, p + 1);
- DEBUGF(stderr, "before string: \"%s\"\n", foo.c_str());
-
- }
-
- tokens.push_back(std::string(p, p + 1));
- ++p;
-
- }
-
- IFDEBUG {
-
- string foo(p, f);
- DEBUGF(stderr, "before string: \"%s\"\n", foo.c_str());
-
- }
-
- tokens.push_back(std::string(p, f));
-
- }
-
- DEBUGF(stderr,
- "SUB2string \"%s\" found at start %lu offset %lu continue "
- "at %lu\n",
- m[0].str().c_str(), p - input.begin(), m.position(),
- c - token.begin());
- tokens.push_back(m[0].str());
+ ++cur;
}
- if (c < e) {
-
- while (c < e) {
-
- IFDEBUG {
-
- string foo(c, c + 1);
- DEBUGF(stderr, "after string: \"%s\"\n", foo.c_str());
-
- }
-
- tokens.push_back(std::string(c, c + 1));
- ++c;
-
- }
+ tokens.push_back(std::string(start, cur));
+ DEBUGF(stderr, "IDENTIFIER %ld \"%s\"\n", cur - start,
+ tokens[tokens.size() - 1].c_str());
- IFDEBUG {
-
- string foo(c, e);
- DEBUGF(stderr, "after string: \"%s\"\n", foo.c_str());
-
- }
+ } else {
- tokens.push_back(std::string(c, e));
-
- }
+ tokens.push_back(std::string(cur, cur + 1));
+ DEBUGF(stderr, "OTHER \"%c\"\n", *cur);
+ ++cur;
}
@@ -1065,15 +818,9 @@ extern "C" unsigned char afl_custom_queue_get(void *data,
IFDEBUG {
DEBUGF(stderr, "DUMPING TOKENS:\n");
- u32 size_1 = tokens.size() - 1;
for (u32 i = 0; i < tokens.size(); ++i) {
DEBUGF(stderr, "%s", tokens[i].c_str());
- if (unlikely(alternative_tokenize && i < size_1)) {
-
- DEBUGF(stderr, "%s", whitespace.c_str());
-
- }
}
@@ -1157,7 +904,6 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) {
if (getenv("AUTOTOKENS_DEBUG")) { debug = 1; }
if (getenv("AUTOTOKENS_ONLY_FAV")) { only_fav = 1; }
if (getenv("AUTOTOKENS_CREATE_FROM_THIN_AIR")) { create_from_thin_air = 1; }
- if (getenv("AUTOTOKENS_ALTERNATIVE_TOKENIZE")) { alternative_tokenize = 1; }
if (getenv("AUTOTOKENS_LEARN_DICT")) {
@@ -1180,14 +926,22 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) {
if (getenv("AUTOTOKENS_CHANGE_MIN")) {
change_min = atoi(getenv("AUTOTOKENS_CHANGE_MIN"));
- if (change_min < 1 || change_min > 256) { change_min = AUTOTOKENS_CHANGE_MIN; }
+ if (change_min < 1 || change_min > 256) {
+
+ change_min = AUTOTOKENS_CHANGE_MIN;
+
+ }
}
if (getenv("AUTOTOKENS_CHANGE_MAX")) {
change_max = atoi(getenv("AUTOTOKENS_CHANGE_MAX"));
- if (change_max < 1 || change_max > 4096) { change_max = AUTOTOKENS_CHANGE_MAX; }
+ if (change_max < 1 || change_max > 4096) {
+
+ change_max = AUTOTOKENS_CHANGE_MAX;
+
+ }
}
@@ -1212,53 +966,49 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) {
// set common whitespace tokens
// we deliberately do not put uncommon ones here to these will count as
// identifier tokens.
- if (!alternative_tokenize) {
-
- token_to_id[" "] = current_id;
- id_to_token[current_id] = " ";
- ++current_id;
- token_to_id["\t"] = current_id;
- id_to_token[current_id] = "\t";
- ++current_id;
- token_to_id["\n"] = current_id;
- id_to_token[current_id] = "\n";
- ++current_id;
- token_to_id["\r\n"] = current_id;
- id_to_token[current_id] = "\r\n";
- ++current_id;
- token_to_id[" \n"] = current_id;
- id_to_token[current_id] = " \n";
- ++current_id;
- token_to_id[" "] = current_id;
- id_to_token[current_id] = " ";
- ++current_id;
- token_to_id["\t\t"] = current_id;
- id_to_token[current_id] = "\t\t";
- ++current_id;
- token_to_id["\n\n"] = current_id;
- id_to_token[current_id] = "\n\n";
- ++current_id;
- token_to_id["\r\n\r\n"] = current_id;
- id_to_token[current_id] = "\r\n\r\n";
- ++current_id;
- token_to_id[" "] = current_id;
- id_to_token[current_id] = " ";
- ++current_id;
- token_to_id["\t\t\t\t"] = current_id;
- id_to_token[current_id] = "\t\t\t\t";
- ++current_id;
- token_to_id["\n\n\n\n"] = current_id;
- id_to_token[current_id] = "\n\n\n\n";
- ++current_id;
- whitespace_ids = current_id;
- token_to_id["\""] = current_id;
- id_to_token[current_id] = "\"";
- ++current_id;
- token_to_id["'"] = current_id;
- id_to_token[current_id] = "'";
- ++current_id;
-
- }
+ token_to_id[" "] = current_id;
+ id_to_token[current_id] = " ";
+ ++current_id;
+ token_to_id["\t"] = current_id;
+ id_to_token[current_id] = "\t";
+ ++current_id;
+ token_to_id["\n"] = current_id;
+ id_to_token[current_id] = "\n";
+ ++current_id;
+ token_to_id["\r\n"] = current_id;
+ id_to_token[current_id] = "\r\n";
+ ++current_id;
+ token_to_id[" \n"] = current_id;
+ id_to_token[current_id] = " \n";
+ ++current_id;
+ token_to_id[" "] = current_id;
+ id_to_token[current_id] = " ";
+ ++current_id;
+ token_to_id["\t\t"] = current_id;
+ id_to_token[current_id] = "\t\t";
+ ++current_id;
+ token_to_id["\n\n"] = current_id;
+ id_to_token[current_id] = "\n\n";
+ ++current_id;
+ token_to_id["\r\n\r\n"] = current_id;
+ id_to_token[current_id] = "\r\n\r\n";
+ ++current_id;
+ token_to_id[" "] = current_id;
+ id_to_token[current_id] = " ";
+ ++current_id;
+ token_to_id["\t\t\t\t"] = current_id;
+ id_to_token[current_id] = "\t\t\t\t";
+ ++current_id;
+ token_to_id["\n\n\n\n"] = current_id;
+ id_to_token[current_id] = "\n\n\n\n";
+ ++current_id;
+ whitespace_ids = current_id;
+ token_to_id["\""] = current_id;
+ id_to_token[current_id] = "\"";
+ ++current_id;
+ token_to_id["'"] = current_id;
+ id_to_token[current_id] = "'";
+ ++current_id;
return data;