diff options
Diffstat (limited to 'custom_mutators')
-rw-r--r-- | custom_mutators/autotokens/TODO | 15 | ||||
-rw-r--r-- | custom_mutators/autotokens/autotokens.cpp | 248 |
2 files changed, 197 insertions, 66 deletions
diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO index 700b3fa7..2e5e384f 100644 --- a/custom_mutators/autotokens/TODO +++ b/custom_mutators/autotokens/TODO @@ -1,13 +1,12 @@ whitespace belassen oder notieren? MAYBE 0=space 1=tab 2=linefeed -dictionary mitverwenden? JA aber nur ascii --> neue liste? -wie mache ich das bei honggfuzz? -ansonsten neuer custom mutator entrypoint? +cmplog: only add tokens that were found to fit? + +create from thin air if no good seed after a cycle and dict large enough? +(static u32 no_of_struct_inputs;) + +splice insert, splice overwrite +(linefeed, semicolon) -nur is_ascii wenn cmplog aktiv, ansonsten eigene implementierung -die aber dann dafür sorgt dass eine leere struktur da ist. -is is_ascii in afl-common.o ? -cmplog: only add tokens that were found to fit? diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index afde8c26..2fad8dd7 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -1,5 +1,7 @@ extern "C" { + #include "afl-fuzz.h" + } #include <stdio.h> @@ -13,9 +15,7 @@ extern "C" { #include <regex> #define AUTOTOKENS_DEBUG 1 -#define AUTOTOKENS_LEN_MIN 12 -#define AUTOTOKENS_CHANGE_MIN_PERCENT 5 -#define AUTOTOKENS_CHANGE_MAX_PERCENT 10 +#define AUTOTOKENS_CHANGE_MIN 8 using namespace std; @@ -31,43 +31,55 @@ typedef struct my_mutator { static afl_state *afl_ptr; static int debug = AUTOTOKENS_DEBUG; static u32 current_id = 0; +static u32 valid_structures = 0; +static u32 extras_cnt = 0, a_extras_cnt = 0; static unordered_map<string, vector<u32> *> file_mapping; static unordered_map<string, u32> token_to_id; static unordered_map<u32, string> id_to_token; -static regex regex_comment_slash("(//.*)([\r\n]?)", regex::optimize); -static regex regex_comment_star("/\\*(.|\n)*?\\*/", - regex::multiline | regex::optimize); -static regex regex_string("\"(.*?)\"|'(.*?')", regex::optimize); -static regex regex_word("[A-Za-z0-9_$]+", regex::optimize); -static regex regex_whitespace(R"([ \t]+)", regex::optimize); -static vector<u32> *s; +static regex regex_comment_slash("(//.*)([\r\n]?)", regex::optimize); +static regex regex_comment_star("/\\*(.|\n)*?\\*/", + regex::multiline | regex::optimize); +static regex regex_string("\"(.*?)\"|'(.*?')", regex::optimize); +static regex regex_word("[A-Za-z0-9_$]+", regex::optimize); +static regex regex_whitespace(R"([ \t]+)", regex::optimize); +static vector<u32> *s; // the structure of the currently selected input -extern "C" size_t afl_custom_fuzz(my_mutator_t *data, uint8_t *buf, size_t buf_size, - u8 **out_buf, uint8_t *add_buf, - size_t add_buf_size, size_t max_size) { +extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, + u8 **out_buf, u8 *add_buf, + size_t add_buf_size, size_t max_size) { - DEBUG(stderr, "MUT!\n"); + if (s == NULL) { + + *out_buf = NULL; + return 0; - if (s == NULL) { return 0; } + } - vector<u32> m = *s; - u32 i, m_size = (u32)m.size(); + vector<u32> m = *s; // copy of the structure we will modify + u32 i, m_size = (u32)m.size(); - u32 rounds = MAX(8, MIN(m_size >> 3, HAVOC_CYCLES * afl_ptr->queue_cur->perf_score * afl_ptr->havoc_div / 256)); - DEBUG(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds); + u32 rounds = + MAX(AUTOTOKENS_CHANGE_MIN, + MIN(m_size >> 3, HAVOC_CYCLES * afl_ptr->queue_cur->perf_score * + afl_ptr->havoc_div / 256)); + // DEBUG(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds); for (i = 0; i < rounds; ++i) { - + u32 item, new_item; - - switch(rand_below(afl_ptr, 4)) { + + switch (rand_below(afl_ptr, 4)) { + /* CHANGE */ - case 0: /* fall through */ + case 0: /* fall through */ case 1: item = rand_below(afl_ptr, m_size); do { + new_item = 1 + rand_below(afl_ptr, current_id); - } while(unlikely(new_item == m[item])); + + } while (unlikely(new_item == m[item])); + m[item] = new_item; break; /* INSERT (+1 so we insert also after last place) */ @@ -81,31 +93,32 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, uint8_t *buf, size_t buf_s if (m_size > 8) { m.erase(m.begin() + rand_below(afl_ptr, m_size)); } --m_size; break; + // TODO: add full line insert splice, replace splace, delete + } - + } - + string output; - u32 m_size_1 = m_size - 1; + u32 m_size_1 = m_size - 1; + for (i = 0; i < m_size; ++i) { + output += id_to_token[m[i]]; if (likely(i < m_size_1)) { output += " "; } + } u32 mutated_size = output.size(); - u8 *mutated_out = (u8*)afl_realloc((void**)out_buf, mutated_size); + u8 *mutated_out = (u8 *)afl_realloc((void **)out_buf, mutated_size); if (unlikely(!mutated_out)) { - + *out_buf = NULL; return 0; - + } - /* - *out_buf = buf; - return buf_size; - */ memcpy(mutated_out, output.data(), mutated_size); *out_buf = mutated_out; DEBUG(stderr, "MUTATED to %u bytes:\n%s\n---\n", mutated_size, mutated_out); @@ -113,29 +126,106 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, uint8_t *buf, size_t buf_s } - /* We are not using afl_custom_queue_new_entry() because not every corpus entry will be necessarily fuzzed. so we use afl_custom_queue_get() instead */ extern "C" unsigned char afl_custom_queue_get(void *data, const unsigned char *filename) { - if (likely(!debug)) - if (!afl_ptr->queue_cur->is_ascii) { s = NULL; return 0; } + if (likely(!debug)) { + + if (afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) { + + s = NULL; + return 0; + + } + + } + + // check if there are new dictionary entries and add them to the tokens + if (valid_structures) { + + while (extras_cnt < afl_ptr->extras_cnt) { + + u32 ok = 1, l = afl_ptr->extras[extras_cnt].len; + u8 *ptr = afl_ptr->extras[extras_cnt].data; + + for (u32 i = 0; i < l; ++i) { + + if (!isascii((int)ptr[i]) && !isprint((int)ptr[i])) { + + ok = 0; + break; + + } + + } + + if (ok) { + + ++current_id; + token_to_id[(char *)ptr] = current_id; + id_to_token[current_id] = (char *)ptr; + + } + + ++extras_cnt; + DEBUG(stderr, "Added from dictionary: \"%s\"\n", ptr); + + } + + while (a_extras_cnt < afl_ptr->a_extras_cnt) { + + u32 ok = 1, l = afl_ptr->a_extras[a_extras_cnt].len; + u8 *ptr = afl_ptr->a_extras[a_extras_cnt].data; + + for (u32 i = 0; i < l; ++i) { + + if (!isascii((int)ptr[i]) && !isprint((int)ptr[i])) { + + ok = 0; + break; + + } + + } + + if (ok) { + + ++current_id; + token_to_id[(char *)ptr] = current_id; + id_to_token[current_id] = (char *)ptr; + + } + + ++a_extras_cnt; + DEBUG(stderr, "Added from auto dictionary: \"%s\"\n", ptr); + + } + + } vector<u32> *structure = NULL; string fn = (char *)filename; + auto entry = file_mapping.find(fn); - auto entry = file_mapping.find(fn); if (entry == file_mapping.end()) { // this input file was not analyzed for tokens yet, so let's do it! FILE *fp = fopen((char *)filename, "rb"); - if (!fp) { s = NULL; return 0; } // should not happen + if (!fp) { + + s = NULL; + return 0; + + } // should not happen + fseek(fp, 0, SEEK_END); size_t len = (size_t)ftell(fp); - if (len < AUTOTOKENS_LEN_MIN) { + + if (len < AFL_TXT_MIN_LEN) { fclose(fp); file_mapping[fn] = structure; // NULL ptr so we don't read the file again @@ -151,6 +241,30 @@ extern "C" unsigned char afl_custom_queue_get(void *data, fread(input.data(), input.size(), 1, fp); fclose(fp); + if (!afl_ptr->shm.cmplog_mode) { + + // not running with CMPLOG? bad choice, but whatever ... + // we only want text inputs, so we have to check it ourselves. + + u32 valid_chars = 0; + for (u32 i = 0; i < len; ++i) { + + if (isascii((int)input[i]) || isprint((int)input[i])) { ++valid_chars; } + + } + + // we want at least 95% of text characters ... + if (((len * AFL_TXT_MIN_PERCENT) / 100) > valid_chars) { + + file_mapping[fn] = NULL; + DEBUG(stderr, "Not text (%lu) %s\n", len, filename); + s = NULL; + return 0; + + } + + } + // DEBUG(stderr, "Read %lu bytes for %s\nBefore comment trim:\n%s\n", // input.size(), filename, input.c_str()); @@ -175,7 +289,6 @@ extern "C" unsigned char afl_custom_queue_get(void *data, string::const_iterator cur = input.begin(), ende = input.end(), last = cur, found, prev; - DEBUG(stderr, "MATCHES:\n"); while (regex_search(cur, ende, match, regex_string)) { prev = cur; @@ -196,11 +309,12 @@ extern "C" unsigned char afl_custom_queue_get(void *data, DEBUG(stderr, "tokens: %lu input size: %lu\n", tokenized.size(), input.size()); - for (auto x : tokenized) { + if (unlikely(debug)) + for (auto x : tokenized) { - cerr << x << endl; + cerr << x << endl; - } + } for (auto token : tokenized) { @@ -232,8 +346,13 @@ extern "C" unsigned char afl_custom_queue_get(void *data, if (c < e) { - string foo(c, e); - DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); + if (unlikely(debug)) { + + string foo(c, e); + DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); + + } + tokens.push_back(std::string(c, e)); } @@ -248,8 +367,6 @@ extern "C" unsigned char afl_custom_queue_get(void *data, if (cur < ende) { - DEBUG(stderr, "REST!\n"); - sregex_token_iterator it{cur, ende, regex_whitespace, -1}; vector<std::string> tokenized{it, {}}; tokenized.erase( @@ -260,11 +377,12 @@ extern "C" unsigned char afl_custom_queue_get(void *data, DEBUG(stderr, "tokens: %lu input size: %lu\n", tokenized.size(), input.size()); - for (auto x : tokenized) { + if (unlikely(debug)) + for (auto x : tokenized) { - cerr << x << endl; + cerr << x << endl; - } + } for (auto token : tokenized) { @@ -279,8 +397,13 @@ extern "C" unsigned char afl_custom_queue_get(void *data, if (p < f) { // there are items between search start and find - string foo(p, f); - DEBUG(stderr, "before string: \"%s\"\n", foo.c_str()); + if (unlikely(debug)) { + + string foo(p, f); + DEBUG(stderr, "before string: \"%s\"\n", foo.c_str()); + + } + tokens.push_back(std::string(p, f)); } @@ -296,8 +419,13 @@ extern "C" unsigned char afl_custom_queue_get(void *data, if (c < e) { - string foo(c, e); - DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); + if (unlikely(debug)) { + + string foo(c, e); + DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); + + } + tokens.push_back(std::string(c, e)); } @@ -306,15 +434,18 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - DEBUG(stderr, "DUMPING TOKENS:\n"); - if (unlikely(debug)) + if (unlikely(debug)) { + + DEBUG(stderr, "DUMPING TOKENS:\n"); for (u32 i = 0; i < tokens.size(); ++i) { DEBUG(stderr, "%s ", tokens[i].c_str()); } - DEBUG(stderr, "---------------------------\n"); + DEBUG(stderr, "---------------------------\n"); + + } /* Now we transform the tokens into an ID list and saved that */ @@ -342,6 +473,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, // save the token structure to the file mapping file_mapping[fn] = structure; s = structure; + ++valid_structures; // we are done! DEBUG(stderr, "DONE! We have %lu tokens in the structure\n", |