aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorvanhauser-thc <vh@thc.org>2023-01-13 18:27:22 +0100
committervanhauser-thc <vh@thc.org>2023-01-13 18:27:22 +0100
commite3dadbfe0f9fad435a6fa201131315500f1a348a (patch)
treedc2dbfb06e9b4fb881514372fd65da7085b855d8
parent75fb918a37ad29b93bd6c220460404dae156fa8c (diff)
downloadafl++-e3dadbfe0f9fad435a6fa201131315500f1a348a.tar.gz
autotokens
-rw-r--r--custom_mutators/autotokens/Makefile7
-rw-r--r--custom_mutators/autotokens/autotokens.cpp391
m---------qemu_mode/qemuafl0
3 files changed, 398 insertions, 0 deletions
diff --git a/custom_mutators/autotokens/Makefile b/custom_mutators/autotokens/Makefile
new file mode 100644
index 00000000..1ee7f5c4
--- /dev/null
+++ b/custom_mutators/autotokens/Makefile
@@ -0,0 +1,7 @@
+all: autotokens.so
+
+autotokens.so: autotokens.cpp
+ $(CXX) -O3 -shared -fPIC -o autotokens.so -I../../include autotokens.cpp ../../src/afl-performance.o
+
+clean:
+ rm -f autotokens.so *~ core \ No newline at end of file
diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
new file mode 100644
index 00000000..afde8c26
--- /dev/null
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -0,0 +1,391 @@
+extern "C" {
+#include "afl-fuzz.h"
+}
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <iostream>
+#include <fstream>
+#include <unordered_map>
+#include <vector>
+#include <regex>
+
+#define AUTOTOKENS_DEBUG 1
+#define AUTOTOKENS_LEN_MIN 12
+#define AUTOTOKENS_CHANGE_MIN_PERCENT 5
+#define AUTOTOKENS_CHANGE_MAX_PERCENT 10
+
+using namespace std;
+
+typedef struct my_mutator {
+
+ afl_state *afl;
+
+} my_mutator_t;
+
+#define DEBUG \
+ if (unlikely(debug)) fprintf
+
+static afl_state *afl_ptr;
+static int debug = AUTOTOKENS_DEBUG;
+static u32 current_id = 0;
+static unordered_map<string, vector<u32> *> file_mapping;
+static unordered_map<string, u32> token_to_id;
+static unordered_map<u32, string> id_to_token;
+static regex regex_comment_slash("(//.*)([\r\n]?)", regex::optimize);
+static regex regex_comment_star("/\\*(.|\n)*?\\*/",
+ regex::multiline | regex::optimize);
+static regex regex_string("\"(.*?)\"|'(.*?')", regex::optimize);
+static regex regex_word("[A-Za-z0-9_$]+", regex::optimize);
+static regex regex_whitespace(R"([ \t]+)", regex::optimize);
+static vector<u32> *s;
+
+extern "C" size_t afl_custom_fuzz(my_mutator_t *data, uint8_t *buf, size_t buf_size,
+ u8 **out_buf, uint8_t *add_buf,
+ size_t add_buf_size, size_t max_size) {
+
+ DEBUG(stderr, "MUT!\n");
+
+ if (s == NULL) { return 0; }
+
+ vector<u32> m = *s;
+ u32 i, m_size = (u32)m.size();
+
+ u32 rounds = MAX(8, MIN(m_size >> 3, HAVOC_CYCLES * afl_ptr->queue_cur->perf_score * afl_ptr->havoc_div / 256));
+ DEBUG(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds);
+
+ for (i = 0; i < rounds; ++i) {
+
+ u32 item, new_item;
+
+ switch(rand_below(afl_ptr, 4)) {
+ /* CHANGE */
+ case 0: /* fall through */
+ case 1:
+ item = rand_below(afl_ptr, m_size);
+ do {
+ new_item = 1 + rand_below(afl_ptr, current_id);
+ } while(unlikely(new_item == m[item]));
+ m[item] = new_item;
+ break;
+ /* INSERT (+1 so we insert also after last place) */
+ case 2:
+ new_item = 1 + rand_below(afl_ptr, current_id);
+ m.insert(m.begin() + rand_below(afl_ptr, m_size + 1), new_item);
+ ++m_size;
+ break;
+ /* ERASE - only if large enough */
+ case 3:
+ if (m_size > 8) { m.erase(m.begin() + rand_below(afl_ptr, m_size)); }
+ --m_size;
+ break;
+ }
+
+ }
+
+ string output;
+ u32 m_size_1 = m_size - 1;
+ for (i = 0; i < m_size; ++i) {
+ output += id_to_token[m[i]];
+ if (likely(i < m_size_1)) { output += " "; }
+ }
+
+ u32 mutated_size = output.size();
+ u8 *mutated_out = (u8*)afl_realloc((void**)out_buf, mutated_size);
+
+ if (unlikely(!mutated_out)) {
+
+ *out_buf = NULL;
+ return 0;
+
+ }
+
+ /*
+ *out_buf = buf;
+ return buf_size;
+ */
+ memcpy(mutated_out, output.data(), mutated_size);
+ *out_buf = mutated_out;
+ DEBUG(stderr, "MUTATED to %u bytes:\n%s\n---\n", mutated_size, mutated_out);
+ return mutated_size;
+
+}
+
+
+/* We are not using afl_custom_queue_new_entry() because not every corpus entry
+ will be necessarily fuzzed. so we use afl_custom_queue_get() instead */
+
+extern "C" unsigned char afl_custom_queue_get(void *data,
+ const unsigned char *filename) {
+
+ if (likely(!debug))
+ if (!afl_ptr->queue_cur->is_ascii) { s = NULL; return 0; }
+
+ vector<u32> *structure = NULL;
+ string fn = (char *)filename;
+
+ auto entry = file_mapping.find(fn);
+ if (entry == file_mapping.end()) {
+
+ // this input file was not analyzed for tokens yet, so let's do it!
+
+ FILE *fp = fopen((char *)filename, "rb");
+ if (!fp) { s = NULL; return 0; } // should not happen
+ fseek(fp, 0, SEEK_END);
+ size_t len = (size_t)ftell(fp);
+ if (len < AUTOTOKENS_LEN_MIN) {
+
+ fclose(fp);
+ file_mapping[fn] = structure; // NULL ptr so we don't read the file again
+ DEBUG(stderr, "Too short (%lu) %s\n", len, filename);
+ s = NULL;
+ return 0;
+
+ }
+
+ string input;
+ input.resize(len);
+ rewind(fp);
+ fread(input.data(), input.size(), 1, fp);
+ fclose(fp);
+
+ // DEBUG(stderr, "Read %lu bytes for %s\nBefore comment trim:\n%s\n",
+ // input.size(), filename, input.c_str());
+
+ input = regex_replace(input, regex_comment_slash, "$2");
+ input = regex_replace(input, regex_comment_star, "");
+
+ DEBUG(stderr, "After replace %lu bytes for %s\n%s\n", input.size(),
+ filename, input.c_str());
+
+ /*
+ u32 spaces = count(input.begin(), input.end(), ' ');
+ u32 tabs = count(input.begin(), input.end(), '\t');
+ u32 linefeeds = count(input.begin(), input.end(), '\n');
+ bool ends_with_linefeed = input[input.length() - 1] == '\n';
+ DEBUG(stderr, "spaces=%u tabs=%u linefeeds=%u ends=%u\n", spaces, tabs,
+ linefeeds, ends_with_linefeed);
+ */
+
+ // now extract all tokens
+ vector<string> tokens;
+ smatch match;
+ string::const_iterator cur = input.begin(), ende = input.end(), last = cur,
+ found, prev;
+
+ DEBUG(stderr, "MATCHES:\n");
+ while (regex_search(cur, ende, match, regex_string)) {
+
+ prev = cur;
+ found = match[1].first;
+ cur = match[1].second;
+ DEBUG(stderr,
+ "string \"%s\" found at start %lu offset %lu continue at %lu\n",
+ match[1].str().c_str(), prev - input.begin(), match.position(),
+ cur - input.begin());
+ if (prev < found) { // there are items between search start and find
+ sregex_token_iterator it{prev, found, regex_whitespace, -1};
+ vector<std::string> tokenized{it, {}};
+ tokenized.erase(
+ std::remove_if(tokenized.begin(), tokenized.end(),
+ [](std::string const &s) { return s.size() == 0; }),
+ tokenized.end());
+ tokens.reserve(tokens.size() + tokenized.size() * 2 + 1);
+
+ DEBUG(stderr, "tokens: %lu input size: %lu\n", tokenized.size(),
+ input.size());
+ for (auto x : tokenized) {
+
+ cerr << x << endl;
+
+ }
+
+ for (auto token : tokenized) {
+
+ string::const_iterator c = token.begin(), e = token.end(), f, p;
+ smatch m;
+
+ while (regex_search(c, e, m, regex_word)) {
+
+ p = c;
+ f = m[0].first;
+ c = m[0].second;
+ if (p < f) {
+
+ // there are items between search start and find
+ string foo(p, f);
+ DEBUG(stderr, "before string: \"%s\"\n", foo.c_str());
+ tokens.push_back(std::string(p, f));
+
+ }
+
+ DEBUG(stderr,
+ "SUBstring \"%s\" found at start %lu offset %lu continue at "
+ "%lu\n",
+ m[0].str().c_str(), p - input.begin(), m.position(),
+ c - token.begin());
+ tokens.push_back(m[0].str());
+
+ }
+
+ if (c < e) {
+
+ string foo(c, e);
+ DEBUG(stderr, "after string: \"%s\"\n", foo.c_str());
+ tokens.push_back(std::string(c, e));
+
+ }
+
+ }
+
+ }
+
+ if (match[1].length() > 0) { tokens.push_back(match[1]); }
+
+ }
+
+ if (cur < ende) {
+
+ DEBUG(stderr, "REST!\n");
+
+ sregex_token_iterator it{cur, ende, regex_whitespace, -1};
+ vector<std::string> tokenized{it, {}};
+ tokenized.erase(
+ std::remove_if(tokenized.begin(), tokenized.end(),
+ [](std::string const &s) { return s.size() == 0; }),
+ tokenized.end());
+ tokens.reserve(tokens.size() + tokenized.size() * 2 + 1);
+
+ DEBUG(stderr, "tokens: %lu input size: %lu\n", tokenized.size(),
+ input.size());
+ for (auto x : tokenized) {
+
+ cerr << x << endl;
+
+ }
+
+ for (auto token : tokenized) {
+
+ string::const_iterator c = token.begin(), e = token.end(), f, p;
+ smatch m;
+
+ while (regex_search(c, e, m, regex_word)) {
+
+ p = c;
+ f = m[0].first;
+ c = m[0].second;
+ if (p < f) {
+
+ // there are items between search start and find
+ string foo(p, f);
+ DEBUG(stderr, "before string: \"%s\"\n", foo.c_str());
+ tokens.push_back(std::string(p, f));
+
+ }
+
+ DEBUG(stderr,
+ "SUB2string \"%s\" found at start %lu offset %lu continue at "
+ "%lu\n",
+ m[0].str().c_str(), p - input.begin(), m.position(),
+ c - token.begin());
+ tokens.push_back(m[0].str());
+
+ }
+
+ if (c < e) {
+
+ string foo(c, e);
+ DEBUG(stderr, "after string: \"%s\"\n", foo.c_str());
+ tokens.push_back(std::string(c, e));
+
+ }
+
+ }
+
+ }
+
+ DEBUG(stderr, "DUMPING TOKENS:\n");
+ if (unlikely(debug))
+ for (u32 i = 0; i < tokens.size(); ++i) {
+
+ DEBUG(stderr, "%s ", tokens[i].c_str());
+
+ }
+
+ DEBUG(stderr, "---------------------------\n");
+
+ /* Now we transform the tokens into an ID list and saved that */
+
+ structure = new vector<u32>();
+ u32 id;
+
+ for (u32 i = 0; i < tokens.size(); ++i) {
+
+ if ((id = token_to_id[tokens[i]]) == 0) {
+
+ // First time we see this token, add it to the list
+ ++current_id;
+ token_to_id[tokens[i]] = current_id;
+ id_to_token[current_id] = tokens[i];
+ structure->push_back(current_id);
+
+ } else {
+
+ structure->push_back(id);
+
+ }
+
+ }
+
+ // save the token structure to the file mapping
+ file_mapping[fn] = structure;
+ s = structure;
+
+ // we are done!
+ DEBUG(stderr, "DONE! We have %lu tokens in the structure\n",
+ structure->size());
+
+ } else {
+
+ if (entry->second == NULL) {
+
+ DEBUG(stderr, "Skipping %s\n", filename);
+ s = NULL;
+ return 0;
+
+ }
+
+ s = entry->second;
+ DEBUG(stderr, "OK %s\n", filename);
+
+ }
+
+ return 1; // we always fuzz unless non-ascii or too small
+
+}
+
+extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) {
+
+ (void)(seed);
+ my_mutator_t *data = (my_mutator_t *)calloc(1, sizeof(my_mutator_t));
+ if (!data) {
+
+ perror("afl_custom_init alloc");
+ return NULL;
+
+ }
+
+ data->afl = afl_ptr = afl;
+
+ return data;
+
+}
+
+extern "C" void afl_custom_deinit(my_mutator_t *data) {
+
+ free(data);
+
+}
+
diff --git a/qemu_mode/qemuafl b/qemu_mode/qemuafl
-Subproject a8af9cbde71e333ce72a46f15e655d0b82ed093
+Subproject a120c3feb573d4cade292cdeb7c1f6b1ce109ef