aboutsummaryrefslogtreecommitdiff
path: root/custom_mutators
diff options
context:
space:
mode:
authorvanhauser-thc <vh@thc.org>2023-01-15 13:47:31 +0100
committervanhauser-thc <vh@thc.org>2023-01-15 13:47:31 +0100
commit35801bed7a5feb8cc3a363bafbd577f256c467f6 (patch)
tree29a48f21bc678a605e15cdd4fd205ae7a42cba45 /custom_mutators
parent9548af52b266ecc2aed81f388f7a1a7a3fcfb181 (diff)
downloadafl++-35801bed7a5feb8cc3a363bafbd577f256c467f6.tar.gz
dictionary support
Diffstat (limited to 'custom_mutators')
-rw-r--r--custom_mutators/autotokens/TODO15
-rw-r--r--custom_mutators/autotokens/autotokens.cpp248
2 files changed, 197 insertions, 66 deletions
diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO
index 700b3fa7..2e5e384f 100644
--- a/custom_mutators/autotokens/TODO
+++ b/custom_mutators/autotokens/TODO
@@ -1,13 +1,12 @@
whitespace belassen oder notieren? MAYBE
0=space 1=tab 2=linefeed
-dictionary mitverwenden? JA aber nur ascii
--> neue liste?
-wie mache ich das bei honggfuzz?
-ansonsten neuer custom mutator entrypoint?
+cmplog: only add tokens that were found to fit?
+
+create from thin air if no good seed after a cycle and dict large enough?
+(static u32 no_of_struct_inputs;)
+
+splice insert, splice overwrite
+(linefeed, semicolon)
-nur is_ascii wenn cmplog aktiv, ansonsten eigene implementierung
-die aber dann dafür sorgt dass eine leere struktur da ist.
-is is_ascii in afl-common.o ?
-cmplog: only add tokens that were found to fit?
diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index afde8c26..2fad8dd7 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -1,5 +1,7 @@
extern "C" {
+
#include "afl-fuzz.h"
+
}
#include <stdio.h>
@@ -13,9 +15,7 @@ extern "C" {
#include <regex>
#define AUTOTOKENS_DEBUG 1
-#define AUTOTOKENS_LEN_MIN 12
-#define AUTOTOKENS_CHANGE_MIN_PERCENT 5
-#define AUTOTOKENS_CHANGE_MAX_PERCENT 10
+#define AUTOTOKENS_CHANGE_MIN 8
using namespace std;
@@ -31,43 +31,55 @@ typedef struct my_mutator {
static afl_state *afl_ptr;
static int debug = AUTOTOKENS_DEBUG;
static u32 current_id = 0;
+static u32 valid_structures = 0;
+static u32 extras_cnt = 0, a_extras_cnt = 0;
static unordered_map<string, vector<u32> *> file_mapping;
static unordered_map<string, u32> token_to_id;
static unordered_map<u32, string> id_to_token;
-static regex regex_comment_slash("(//.*)([\r\n]?)", regex::optimize);
-static regex regex_comment_star("/\\*(.|\n)*?\\*/",
- regex::multiline | regex::optimize);
-static regex regex_string("\"(.*?)\"|'(.*?')", regex::optimize);
-static regex regex_word("[A-Za-z0-9_$]+", regex::optimize);
-static regex regex_whitespace(R"([ \t]+)", regex::optimize);
-static vector<u32> *s;
+static regex regex_comment_slash("(//.*)([\r\n]?)", regex::optimize);
+static regex regex_comment_star("/\\*(.|\n)*?\\*/",
+ regex::multiline | regex::optimize);
+static regex regex_string("\"(.*?)\"|'(.*?')", regex::optimize);
+static regex regex_word("[A-Za-z0-9_$]+", regex::optimize);
+static regex regex_whitespace(R"([ \t]+)", regex::optimize);
+static vector<u32> *s; // the structure of the currently selected input
-extern "C" size_t afl_custom_fuzz(my_mutator_t *data, uint8_t *buf, size_t buf_size,
- u8 **out_buf, uint8_t *add_buf,
- size_t add_buf_size, size_t max_size) {
+extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
+ u8 **out_buf, u8 *add_buf,
+ size_t add_buf_size, size_t max_size) {
- DEBUG(stderr, "MUT!\n");
+ if (s == NULL) {
+
+ *out_buf = NULL;
+ return 0;
- if (s == NULL) { return 0; }
+ }
- vector<u32> m = *s;
- u32 i, m_size = (u32)m.size();
+ vector<u32> m = *s; // copy of the structure we will modify
+ u32 i, m_size = (u32)m.size();
- u32 rounds = MAX(8, MIN(m_size >> 3, HAVOC_CYCLES * afl_ptr->queue_cur->perf_score * afl_ptr->havoc_div / 256));
- DEBUG(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds);
+ u32 rounds =
+ MAX(AUTOTOKENS_CHANGE_MIN,
+ MIN(m_size >> 3, HAVOC_CYCLES * afl_ptr->queue_cur->perf_score *
+ afl_ptr->havoc_div / 256));
+ // DEBUG(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds);
for (i = 0; i < rounds; ++i) {
-
+
u32 item, new_item;
-
- switch(rand_below(afl_ptr, 4)) {
+
+ switch (rand_below(afl_ptr, 4)) {
+
/* CHANGE */
- case 0: /* fall through */
+ case 0: /* fall through */
case 1:
item = rand_below(afl_ptr, m_size);
do {
+
new_item = 1 + rand_below(afl_ptr, current_id);
- } while(unlikely(new_item == m[item]));
+
+ } while (unlikely(new_item == m[item]));
+
m[item] = new_item;
break;
/* INSERT (+1 so we insert also after last place) */
@@ -81,31 +93,32 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, uint8_t *buf, size_t buf_s
if (m_size > 8) { m.erase(m.begin() + rand_below(afl_ptr, m_size)); }
--m_size;
break;
+ // TODO: add full line insert splice, replace splace, delete
+
}
-
+
}
-
+
string output;
- u32 m_size_1 = m_size - 1;
+ u32 m_size_1 = m_size - 1;
+
for (i = 0; i < m_size; ++i) {
+
output += id_to_token[m[i]];
if (likely(i < m_size_1)) { output += " "; }
+
}
u32 mutated_size = output.size();
- u8 *mutated_out = (u8*)afl_realloc((void**)out_buf, mutated_size);
+ u8 *mutated_out = (u8 *)afl_realloc((void **)out_buf, mutated_size);
if (unlikely(!mutated_out)) {
-
+
*out_buf = NULL;
return 0;
-
+
}
- /*
- *out_buf = buf;
- return buf_size;
- */
memcpy(mutated_out, output.data(), mutated_size);
*out_buf = mutated_out;
DEBUG(stderr, "MUTATED to %u bytes:\n%s\n---\n", mutated_size, mutated_out);
@@ -113,29 +126,106 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, uint8_t *buf, size_t buf_s
}
-
/* We are not using afl_custom_queue_new_entry() because not every corpus entry
will be necessarily fuzzed. so we use afl_custom_queue_get() instead */
extern "C" unsigned char afl_custom_queue_get(void *data,
const unsigned char *filename) {
- if (likely(!debug))
- if (!afl_ptr->queue_cur->is_ascii) { s = NULL; return 0; }
+ if (likely(!debug)) {
+
+ if (afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) {
+
+ s = NULL;
+ return 0;
+
+ }
+
+ }
+
+ // check if there are new dictionary entries and add them to the tokens
+ if (valid_structures) {
+
+ while (extras_cnt < afl_ptr->extras_cnt) {
+
+ u32 ok = 1, l = afl_ptr->extras[extras_cnt].len;
+ u8 *ptr = afl_ptr->extras[extras_cnt].data;
+
+ for (u32 i = 0; i < l; ++i) {
+
+ if (!isascii((int)ptr[i]) && !isprint((int)ptr[i])) {
+
+ ok = 0;
+ break;
+
+ }
+
+ }
+
+ if (ok) {
+
+ ++current_id;
+ token_to_id[(char *)ptr] = current_id;
+ id_to_token[current_id] = (char *)ptr;
+
+ }
+
+ ++extras_cnt;
+ DEBUG(stderr, "Added from dictionary: \"%s\"\n", ptr);
+
+ }
+
+ while (a_extras_cnt < afl_ptr->a_extras_cnt) {
+
+ u32 ok = 1, l = afl_ptr->a_extras[a_extras_cnt].len;
+ u8 *ptr = afl_ptr->a_extras[a_extras_cnt].data;
+
+ for (u32 i = 0; i < l; ++i) {
+
+ if (!isascii((int)ptr[i]) && !isprint((int)ptr[i])) {
+
+ ok = 0;
+ break;
+
+ }
+
+ }
+
+ if (ok) {
+
+ ++current_id;
+ token_to_id[(char *)ptr] = current_id;
+ id_to_token[current_id] = (char *)ptr;
+
+ }
+
+ ++a_extras_cnt;
+ DEBUG(stderr, "Added from auto dictionary: \"%s\"\n", ptr);
+
+ }
+
+ }
vector<u32> *structure = NULL;
string fn = (char *)filename;
+ auto entry = file_mapping.find(fn);
- auto entry = file_mapping.find(fn);
if (entry == file_mapping.end()) {
// this input file was not analyzed for tokens yet, so let's do it!
FILE *fp = fopen((char *)filename, "rb");
- if (!fp) { s = NULL; return 0; } // should not happen
+ if (!fp) {
+
+ s = NULL;
+ return 0;
+
+ } // should not happen
+
fseek(fp, 0, SEEK_END);
size_t len = (size_t)ftell(fp);
- if (len < AUTOTOKENS_LEN_MIN) {
+
+ if (len < AFL_TXT_MIN_LEN) {
fclose(fp);
file_mapping[fn] = structure; // NULL ptr so we don't read the file again
@@ -151,6 +241,30 @@ extern "C" unsigned char afl_custom_queue_get(void *data,
fread(input.data(), input.size(), 1, fp);
fclose(fp);
+ if (!afl_ptr->shm.cmplog_mode) {
+
+ // not running with CMPLOG? bad choice, but whatever ...
+ // we only want text inputs, so we have to check it ourselves.
+
+ u32 valid_chars = 0;
+ for (u32 i = 0; i < len; ++i) {
+
+ if (isascii((int)input[i]) || isprint((int)input[i])) { ++valid_chars; }
+
+ }
+
+ // we want at least 95% of text characters ...
+ if (((len * AFL_TXT_MIN_PERCENT) / 100) > valid_chars) {
+
+ file_mapping[fn] = NULL;
+ DEBUG(stderr, "Not text (%lu) %s\n", len, filename);
+ s = NULL;
+ return 0;
+
+ }
+
+ }
+
// DEBUG(stderr, "Read %lu bytes for %s\nBefore comment trim:\n%s\n",
// input.size(), filename, input.c_str());
@@ -175,7 +289,6 @@ extern "C" unsigned char afl_custom_queue_get(void *data,
string::const_iterator cur = input.begin(), ende = input.end(), last = cur,
found, prev;
- DEBUG(stderr, "MATCHES:\n");
while (regex_search(cur, ende, match, regex_string)) {
prev = cur;
@@ -196,11 +309,12 @@ extern "C" unsigned char afl_custom_queue_get(void *data,
DEBUG(stderr, "tokens: %lu input size: %lu\n", tokenized.size(),
input.size());
- for (auto x : tokenized) {
+ if (unlikely(debug))
+ for (auto x : tokenized) {
- cerr << x << endl;
+ cerr << x << endl;
- }
+ }
for (auto token : tokenized) {
@@ -232,8 +346,13 @@ extern "C" unsigned char afl_custom_queue_get(void *data,
if (c < e) {
- string foo(c, e);
- DEBUG(stderr, "after string: \"%s\"\n", foo.c_str());
+ if (unlikely(debug)) {
+
+ string foo(c, e);
+ DEBUG(stderr, "after string: \"%s\"\n", foo.c_str());
+
+ }
+
tokens.push_back(std::string(c, e));
}
@@ -248,8 +367,6 @@ extern "C" unsigned char afl_custom_queue_get(void *data,
if (cur < ende) {
- DEBUG(stderr, "REST!\n");
-
sregex_token_iterator it{cur, ende, regex_whitespace, -1};
vector<std::string> tokenized{it, {}};
tokenized.erase(
@@ -260,11 +377,12 @@ extern "C" unsigned char afl_custom_queue_get(void *data,
DEBUG(stderr, "tokens: %lu input size: %lu\n", tokenized.size(),
input.size());
- for (auto x : tokenized) {
+ if (unlikely(debug))
+ for (auto x : tokenized) {
- cerr << x << endl;
+ cerr << x << endl;
- }
+ }
for (auto token : tokenized) {
@@ -279,8 +397,13 @@ extern "C" unsigned char afl_custom_queue_get(void *data,
if (p < f) {
// there are items between search start and find
- string foo(p, f);
- DEBUG(stderr, "before string: \"%s\"\n", foo.c_str());
+ if (unlikely(debug)) {
+
+ string foo(p, f);
+ DEBUG(stderr, "before string: \"%s\"\n", foo.c_str());
+
+ }
+
tokens.push_back(std::string(p, f));
}
@@ -296,8 +419,13 @@ extern "C" unsigned char afl_custom_queue_get(void *data,
if (c < e) {
- string foo(c, e);
- DEBUG(stderr, "after string: \"%s\"\n", foo.c_str());
+ if (unlikely(debug)) {
+
+ string foo(c, e);
+ DEBUG(stderr, "after string: \"%s\"\n", foo.c_str());
+
+ }
+
tokens.push_back(std::string(c, e));
}
@@ -306,15 +434,18 @@ extern "C" unsigned char afl_custom_queue_get(void *data,
}
- DEBUG(stderr, "DUMPING TOKENS:\n");
- if (unlikely(debug))
+ if (unlikely(debug)) {
+
+ DEBUG(stderr, "DUMPING TOKENS:\n");
for (u32 i = 0; i < tokens.size(); ++i) {
DEBUG(stderr, "%s ", tokens[i].c_str());
}
- DEBUG(stderr, "---------------------------\n");
+ DEBUG(stderr, "---------------------------\n");
+
+ }
/* Now we transform the tokens into an ID list and saved that */
@@ -342,6 +473,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data,
// save the token structure to the file mapping
file_mapping[fn] = structure;
s = structure;
+ ++valid_structures;
// we are done!
DEBUG(stderr, "DONE! We have %lu tokens in the structure\n",