aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--custom_mutators/autotokens/autotokens.cpp97
1 files changed, 78 insertions, 19 deletions
diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp
index 46a347f8..f1263600 100644
--- a/custom_mutators/autotokens/autotokens.cpp
+++ b/custom_mutators/autotokens/autotokens.cpp
@@ -24,10 +24,12 @@ extern "C" {
#define AUTOTOKENS_ONLY_FAV 0
#define AUTOTOKENS_ALTERNATIVE_TOKENIZE 0
#define AUTOTOKENS_CHANGE_MIN 8
+#define AUTOTOKENS_CHANGE_MAX 64
#define AUTOTOKENS_WHITESPACE " "
#define AUTOTOKENS_SIZE_MIN 8
#define AUTOTOKENS_SPLICE_MIN 4
#define AUTOTOKENS_SPLICE_MAX 64
+#define AUTOTOKENS_CREATE_FROM_THIN_AIR 1
#define AUTOTOKENS_FUZZ_COUNT_SHIFT 0
// 0 = no learning, 1 only from -x dict/autodict, 2 also from cmplog
#define AUTOTOKENS_LEARN_DICT 2
@@ -61,6 +63,7 @@ static int only_fav = AUTOTOKENS_ONLY_FAV;
static int alternative_tokenize = AUTOTOKENS_ALTERNATIVE_TOKENIZE;
static int learn_dictionary_tokens = AUTOTOKENS_LEARN_DICT;
static int fuzz_count_shift = AUTOTOKENS_FUZZ_COUNT_SHIFT;
+static int create_from_thin_air = AUTOTOKENS_CREATE_FROM_THIN_AIR;
static u32 current_id;
static u32 valid_structures;
static u32 whitespace_ids;
@@ -83,7 +86,18 @@ static regex regex_word("[A-Za-z0-9_$.-]+", regex::optimize);
static regex regex_whitespace(R"([ \t]+)", regex::optimize);
static vector<u32> *s; // the structure of the currently selected input
-u32 good_whitespace_or_singleval() {
+// FUNCTIONS
+
+/* This function is called once after everything is set up but before
+ any fuzzing attempt has been performed.
+ This is called in afl_custom_queue_get() */
+static void first_run(void *data) {
+
+ (void)(data);
+
+}
+
+static u32 good_whitespace_or_singleval() {
u32 i = rand_below(afl_ptr, current_id);
if (id_to_token[i].size() == 1) { return i; }
@@ -105,6 +119,8 @@ u32 good_whitespace_or_singleval() {
extern "C" u32 afl_custom_fuzz_count(void *data, const u8 *buf,
size_t buf_size) {
+ (void)(data);
+
if (s == NULL) return 0;
u32 shift = unlikely(afl_ptr->custom_only) ? 7 : 8;
@@ -135,9 +151,10 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
u32 i, m_size = (u32)m.size();
u32 rounds =
- MAX(AUTOTOKENS_CHANGE_MIN,
- MIN(m_size >> 3, HAVOC_CYCLES * afl_ptr->queue_cur->perf_score *
- afl_ptr->havoc_div / 256));
+ MIN(AUTOTOKENS_CHANGE_MAX,
+ MAX(AUTOTOKENS_CHANGE_MIN,
+ MIN(m_size >> 3, HAVOC_CYCLES * afl_ptr->queue_cur->perf_score *
+ afl_ptr->havoc_div / 256)));
// DEBUGF(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds);
#if AUTOTOKENS_SPLICE_DISABLE == 1
@@ -379,9 +396,10 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size,
/* I get f*cking stack overflow using C++ regex with a regex of
"\"[[:print:]]*?\"" if this matches a long string even with regex::optimize
enabled :-( */
-u8 my_search_string(string::const_iterator cur, string::const_iterator ende,
- string::const_iterator *match_begin,
- string::const_iterator *match_end) {
+static u8 my_search_string(string::const_iterator cur,
+ string::const_iterator ende,
+ string::const_iterator *match_begin,
+ string::const_iterator *match_end) {
string::const_iterator start = cur, found_begin;
u8 quote_type = 0;
@@ -460,25 +478,30 @@ u8 my_search_string(string::const_iterator cur, string::const_iterator ende,
}
/* We are not using afl_custom_queue_new_entry() because not every corpus entry
- will be necessarily fuzzed. so we use afl_custom_queue_get() instead */
+ will be necessarily fuzzed with this custom mutator.
+ So we use afl_custom_queue_get() instead. */
extern "C" unsigned char afl_custom_queue_get(void *data,
const unsigned char *filename) {
- static int learn_state;
+ static int learn_state = 0;
+ static int is_first_run = 1;
(void)(data);
- if (likely(!debug)) {
+ if (unlikely(is_first_run)) {
- if (unlikely(!afl_ptr->custom_only) &&
- ((afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) ||
- (only_fav && !afl_ptr->queue_cur->favored))) {
+ is_first_run = 0;
+ first_run(data);
- s = NULL;
- DEBUGF(stderr, "cmplog not ascii or only_fav and not favorite\n");
- return 1;
+ }
- }
+ if (unlikely(!afl_ptr->custom_only) && !create_from_thin_air &&
+ ((afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) ||
+ (only_fav && !afl_ptr->queue_cur->favored))) {
+
+ s = NULL;
+ DEBUGF(stderr, "cmplog not ascii or only_fav and not favorite\n");
+ return 1;
}
@@ -551,6 +574,42 @@ extern "C" unsigned char afl_custom_queue_get(void *data,
string fn = (char *)filename;
auto entry = file_mapping.find(fn);
+ // if there is only one active queue item at start and it is very small
+ // the we create once a structure randomly.
+ if (unlikely(create_from_thin_air)) {
+
+ if (current_id > whitespace_ids + 6 && afl_ptr->active_items == 1 &&
+ afl_ptr->queue_cur->len < AFL_TXT_MIN_LEN) {
+
+ DEBUGF(stderr, "Creating an entry from thin air...\n");
+ structure = new vector<u32>();
+ u32 item, prev, cnt = current_id >> 1;
+ structure->reserve(cnt + 4);
+ for (u32 i = 0; i < cnt; i++) {
+
+ item = rand_below(afl_ptr, current_id);
+ if (i && id_to_token[item].length() > 1 &&
+ id_to_token[prev].length() > 1) {
+
+ structure->push_back(good_whitespace_or_singleval());
+
+ }
+
+ structure->push_back(item);
+ prev = item;
+
+ }
+
+ file_mapping[fn] = structure;
+ s = structure;
+ return 1;
+
+ }
+
+ create_from_thin_air = 0;
+
+ }
+
if (entry == file_mapping.end()) {
// this input file was not analyzed for tokens yet, so let's do it!
@@ -574,8 +633,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data,
DEBUGF(stderr, "Too short (%lu) %s\n", len, filename);
return 1;
- } else
- if (len > AFL_TXT_MAX_LEN) {
+ } else if (len > AFL_TXT_MAX_LEN) {
fclose(fp);
file_mapping[fn] = structure; // NULL ptr so we don't read the file again
@@ -1088,6 +1146,7 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) {
if (getenv("AUTOTOKENS_DEBUG")) { debug = 1; }
if (getenv("AUTOTOKENS_ONLY_FAV")) { only_fav = 1; }
+ if (getenv("AUTOTOKENS_CREATE_FROM_THIN_AIR")) { create_from_thin_air = 1; }
if (getenv("AUTOTOKENS_ALTERNATIVE_TOKENIZE")) { alternative_tokenize = 1; }
if (getenv("AUTOTOKENS_LEARN_DICT")) {