From 77f253c1b910e5240ac467680ea9e21a1e4e05ef Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Mon, 26 Sep 2022 03:43:45 +0200 Subject: fix custom mutator build scripts --- custom_mutators/gramatron/build_gramatron_mutator.sh | 2 +- custom_mutators/gramatron/json-c | 2 +- custom_mutators/grammar_mutator/build_grammar_mutator.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/gramatron/build_gramatron_mutator.sh b/custom_mutators/gramatron/build_gramatron_mutator.sh index 9952e7f5..ff88ff26 100755 --- a/custom_mutators/gramatron/build_gramatron_mutator.sh +++ b/custom_mutators/gramatron/build_gramatron_mutator.sh @@ -125,7 +125,7 @@ else } fi -test -d json-c/.git || { echo "[-] not checked out, please install git or check your internet connection." ; exit 1 ; } +test -e json-c/.git || { echo "[-] not checked out, please install git or check your internet connection." ; exit 1 ; } echo "[+] Got json-c." test -e json-c/.libs/libjson-c.a || { diff --git a/custom_mutators/gramatron/json-c b/custom_mutators/gramatron/json-c index 11546bfd..af8dd4a3 160000 --- a/custom_mutators/gramatron/json-c +++ b/custom_mutators/gramatron/json-c @@ -1 +1 @@ -Subproject commit 11546bfd07a575c47416924cb98de3d33a4e6424 +Subproject commit af8dd4a307e7b837f9fa2959549548ace4afe08b diff --git a/custom_mutators/grammar_mutator/build_grammar_mutator.sh b/custom_mutators/grammar_mutator/build_grammar_mutator.sh index 5121b07f..74cae8aa 100755 --- a/custom_mutators/grammar_mutator/build_grammar_mutator.sh +++ b/custom_mutators/grammar_mutator/build_grammar_mutator.sh @@ -119,7 +119,7 @@ else } fi -test -f grammar_mutator/.git || { echo "[-] not checked out, please install git or check your internet connection." ; exit 1 ; } +test -e grammar_mutator/.git || { echo "[-] not checked out, please install git or check your internet connection." ; exit 1 ; } echo "[+] Got grammar mutator." cd "grammar_mutator" || exit 1 -- cgit v1.2.3 From 9e4137a3ebdda76d7cfa3da03ae2f4b1317be012 Mon Sep 17 00:00:00 2001 From: Dominik Maier Date: Mon, 3 Oct 2022 22:58:21 +0200 Subject: Enabled tricore arch for unicornafl --- custom_mutators/gramatron/json-c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'custom_mutators') diff --git a/custom_mutators/gramatron/json-c b/custom_mutators/gramatron/json-c index af8dd4a3..11546bfd 160000 --- a/custom_mutators/gramatron/json-c +++ b/custom_mutators/gramatron/json-c @@ -1 +1 @@ -Subproject commit af8dd4a307e7b837f9fa2959549548ace4afe08b +Subproject commit 11546bfd07a575c47416924cb98de3d33a4e6424 -- cgit v1.2.3 From 4a7cd53f64abf4eac311621220afc927574fdcea Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Mon, 14 Nov 2022 21:26:17 +0100 Subject: custom_send example --- custom_mutators/examples/custom_send.c | 56 ++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 custom_mutators/examples/custom_send.c (limited to 'custom_mutators') diff --git a/custom_mutators/examples/custom_send.c b/custom_mutators/examples/custom_send.c new file mode 100644 index 00000000..ffea927e --- /dev/null +++ b/custom_mutators/examples/custom_send.c @@ -0,0 +1,56 @@ +// cc -O3 -fPIC -shared -g -o custom_send.so -I../../include custom_send.c +// cd ../.. +// afl-cc -o test-instr test-instr.c +// afl-fuzz -i in -o out -- ./test-instr -f /tmp/foo + +#include "custom_mutator_helpers.h" + +#include +#include +#include +#include +#include + +typedef struct my_mutator { + + afl_t *afl; + +} my_mutator_t; + +my_mutator_t *afl_custom_init(afl_t *afl, unsigned int seed) { + + my_mutator_t *data = calloc(1, sizeof(my_mutator_t)); + if (!data) { + + perror("afl_custom_init alloc"); + return NULL; + + } + + data->afl = afl; + + return data; + +} + +void afl_custom_fuzz_send(my_mutator_t *data, uint8_t *buf, size_t buf_size) { + + int fd = open("/tmp/foo", O_CREAT | O_NOFOLLOW | O_TRUNC | O_RDWR, 0644); + + if (fd >= 0) { + + (void)write(fd, buf, buf_size); + close(fd); + + } + + return; + +} + +void afl_custom_deinit(my_mutator_t *data) { + + free(data); + +} + -- cgit v1.2.3 From 35f09e11a4373b0fb42c690d23127c144f72f73c Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Tue, 3 Jan 2023 09:38:00 +0100 Subject: welcome 2023 --- custom_mutators/gramatron/build_gramatron_mutator.sh | 2 +- custom_mutators/grammar_mutator/build_grammar_mutator.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/gramatron/build_gramatron_mutator.sh b/custom_mutators/gramatron/build_gramatron_mutator.sh index ff88ff26..c830329e 100755 --- a/custom_mutators/gramatron/build_gramatron_mutator.sh +++ b/custom_mutators/gramatron/build_gramatron_mutator.sh @@ -11,7 +11,7 @@ # Adapted for AFLplusplus by Dominik Maier # # Copyright 2017 Battelle Memorial Institute. All rights reserved. -# Copyright 2019-2022 AFLplusplus Project. All rights reserved. +# Copyright 2019-2023 AFLplusplus Project. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/custom_mutators/grammar_mutator/build_grammar_mutator.sh b/custom_mutators/grammar_mutator/build_grammar_mutator.sh index 74cae8aa..593cd2dc 100755 --- a/custom_mutators/grammar_mutator/build_grammar_mutator.sh +++ b/custom_mutators/grammar_mutator/build_grammar_mutator.sh @@ -14,7 +14,7 @@ # # # Copyright 2017 Battelle Memorial Institute. All rights reserved. -# Copyright 2019-2022 AFLplusplus Project. All rights reserved. +# Copyright 2019-2023 AFLplusplus Project. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. -- cgit v1.2.3 From aa39921e49f9bd20a4cade0ba76688fc31f35b12 Mon Sep 17 00:00:00 2001 From: Dominik Maier Date: Thu, 5 Jan 2023 11:47:25 +0000 Subject: Update LibAFL custom mutator to latest --- custom_mutators/libafl_base/Cargo.toml | 2 +- custom_mutators/libafl_base/src/lib.rs | 29 ++++++++++++++++++++++------- 2 files changed, 23 insertions(+), 8 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/libafl_base/Cargo.toml b/custom_mutators/libafl_base/Cargo.toml index 6e40fc39..ac6b0c8f 100644 --- a/custom_mutators/libafl_base/Cargo.toml +++ b/custom_mutators/libafl_base/Cargo.toml @@ -6,7 +6,7 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -libafl = { git = "https://github.com/AFLplusplus/LibAFL.git", rev = "62614ce1016c86e3f00f35b56399292ceabd486b" } +libafl = { git = "https://github.com/AFLplusplus/LibAFL.git", rev = "266677bb88abe75165430f34e7de897c35560504" } custom_mutator = { path = "../rust/custom_mutator", features = ["afl_internals"] } serde = { version = "1.0", default-features = false, features = ["alloc"] } # serialization lib diff --git a/custom_mutators/libafl_base/src/lib.rs b/custom_mutators/libafl_base/src/lib.rs index 6f2db8ca..dc1c5e0c 100644 --- a/custom_mutators/libafl_base/src/lib.rs +++ b/custom_mutators/libafl_base/src/lib.rs @@ -18,10 +18,12 @@ use libafl::{ scheduled::{havoc_mutations, tokens_mutations, StdScheduledMutator, Tokens}, Mutator, }, - state::{HasCorpus, HasMaxSize, HasMetadata, HasRand, State}, + prelude::UsesInput, + state::{HasCorpus, HasMaxSize, HasMetadata, HasRand, State, UsesState}, Error, }; +#[allow(clippy::identity_op)] const MAX_FILE: usize = 1 * 1024 * 1024; static mut AFL: Option<&'static afl_state> = None; @@ -64,7 +66,11 @@ impl<'de> Deserialize<'de> for AFLCorpus { } } -impl Corpus for AFLCorpus { +impl UsesState for AFLCorpus { + type State = AFLState; +} + +impl Corpus for AFLCorpus { #[inline] fn count(&self) -> usize { afl().queued_items as usize @@ -76,7 +82,11 @@ impl Corpus for AFLCorpus { } #[inline] - fn replace(&mut self, idx: usize, testcase: Testcase) -> Result<(), Error> { + fn replace( + &mut self, + idx: usize, + testcase: Testcase, + ) -> Result, Error> { unimplemented!(); } @@ -92,7 +102,7 @@ impl Corpus for AFLCorpus { entries.entry(idx).or_insert_with(|| { let queue_buf = std::slice::from_raw_parts_mut(afl().queue_buf, self.count()); let entry = queue_buf[idx].as_mut().unwrap(); - let fname = CStr::from_ptr((entry.fname as *mut i8).as_ref().unwrap()) + let fname = CStr::from_ptr((entry.fname.cast::()).as_ref().unwrap()) .to_str() .unwrap() .to_owned(); @@ -127,9 +137,10 @@ pub struct AFLState { } impl AFLState { + #[must_use] pub fn new(seed: u32) -> Self { Self { - rand: StdRand::with_seed(seed as u64), + rand: StdRand::with_seed(u64::from(seed)), corpus: AFLCorpus::default(), metadata: SerdeAnyMap::new(), max_size: MAX_FILE, @@ -153,7 +164,11 @@ impl HasRand for AFLState { } } -impl HasCorpus for AFLState { +impl UsesInput for AFLState { + type Input = BytesInput; +} + +impl HasCorpus for AFLState { type Corpus = AFLCorpus; #[inline] @@ -208,7 +223,7 @@ impl CustomMutator for LibAFLBaseCustomMutator { tokens.push(data.to_vec()); } if !tokens.is_empty() { - state.add_metadata(Tokens::new(tokens)); + state.add_metadata(Tokens::from(tokens)); } Ok(Self { state, -- cgit v1.2.3 From a8b6365a90e09a635907f0c257667e505255910a Mon Sep 17 00:00:00 2001 From: Dominik Maier Date: Thu, 5 Jan 2023 11:49:58 +0000 Subject: LibAFL custom mutator: unused variables with underscores --- custom_mutators/libafl_base/src/lib.rs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/libafl_base/src/lib.rs b/custom_mutators/libafl_base/src/lib.rs index dc1c5e0c..bae11e1f 100644 --- a/custom_mutators/libafl_base/src/lib.rs +++ b/custom_mutators/libafl_base/src/lib.rs @@ -1,5 +1,4 @@ #![cfg(unix)] -#![allow(unused_variables)] use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::{ @@ -77,21 +76,21 @@ impl Corpus for AFLCorpus { } #[inline] - fn add(&mut self, testcase: Testcase) -> Result { + fn add(&mut self, _testcase: Testcase) -> Result { unimplemented!(); } #[inline] fn replace( &mut self, - idx: usize, - testcase: Testcase, + _idx: usize, + _testcase: Testcase, ) -> Result, Error> { unimplemented!(); } #[inline] - fn remove(&mut self, idx: usize) -> Result>, Error> { + fn remove(&mut self, _idx: usize) -> Result>, Error> { unimplemented!(); } @@ -235,7 +234,7 @@ impl CustomMutator for LibAFLBaseCustomMutator { fn fuzz<'b, 's: 'b>( &'s mut self, buffer: &'b mut [u8], - add_buff: Option<&[u8]>, + _add_buff: Option<&[u8]>, max_size: usize, ) -> Result, Self::Error> { self.state.set_max_size(max_size); -- cgit v1.2.3 From 462e55da0cf2eb572b93f65d1190fdaac874e25c Mon Sep 17 00:00:00 2001 From: Dominik Maier Date: Thu, 5 Jan 2023 12:12:01 +0000 Subject: Updated rust custom mutator bindgen, fixed clippy lints --- custom_mutators/rust/custom_mutator-sys/Cargo.toml | 6 +- custom_mutators/rust/custom_mutator-sys/build.rs | 4 +- custom_mutators/rust/custom_mutator-sys/src/lib.rs | 2 + custom_mutators/rust/custom_mutator/Cargo.toml | 2 +- custom_mutators/rust/custom_mutator/src/lib.rs | 98 +++++++++++----------- custom_mutators/rust/example/Cargo.toml | 2 +- 6 files changed, 57 insertions(+), 57 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/rust/custom_mutator-sys/Cargo.toml b/custom_mutators/rust/custom_mutator-sys/Cargo.toml index 104f7df0..e38c972e 100644 --- a/custom_mutators/rust/custom_mutator-sys/Cargo.toml +++ b/custom_mutators/rust/custom_mutator-sys/Cargo.toml @@ -1,12 +1,12 @@ [package] name = "custom_mutator-sys" -version = "0.1.0" +version = "0.1.1" authors = ["Julius Hohnerlein "] -edition = "2018" +edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] [build-dependencies] -bindgen = "0.56" +bindgen = "0.63" diff --git a/custom_mutators/rust/custom_mutator-sys/build.rs b/custom_mutators/rust/custom_mutator-sys/build.rs index 3c88a90d..ba4390ff 100644 --- a/custom_mutators/rust/custom_mutator-sys/build.rs +++ b/custom_mutators/rust/custom_mutator-sys/build.rs @@ -15,8 +15,8 @@ fn main() { // The input header we would like to generate // bindings for. .header("wrapper.h") - .whitelist_type("afl_state_t") - .blacklist_type(r"u\d+") + .allowlist_type("afl_state_t") + .blocklist_type(r"u\d+") .opaque_type(r"_.*") .opaque_type("FILE") .opaque_type("in_addr(_t)?") diff --git a/custom_mutators/rust/custom_mutator-sys/src/lib.rs b/custom_mutators/rust/custom_mutator-sys/src/lib.rs index a38a13a8..719ac994 100644 --- a/custom_mutators/rust/custom_mutator-sys/src/lib.rs +++ b/custom_mutators/rust/custom_mutator-sys/src/lib.rs @@ -1,5 +1,7 @@ #![allow(non_upper_case_globals)] #![allow(non_camel_case_types)] #![allow(non_snake_case)] +#![allow(clippy::too_many_lines)] +#![allow(clippy::used_underscore_binding)] include!(concat!(env!("OUT_DIR"), "/bindings.rs")); diff --git a/custom_mutators/rust/custom_mutator/Cargo.toml b/custom_mutators/rust/custom_mutator/Cargo.toml index 2d3cdbfa..30f764dc 100644 --- a/custom_mutators/rust/custom_mutator/Cargo.toml +++ b/custom_mutators/rust/custom_mutator/Cargo.toml @@ -2,7 +2,7 @@ name = "custom_mutator" version = "0.1.0" authors = ["Julius Hohnerlein "] -edition = "2018" +edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/custom_mutators/rust/custom_mutator/src/lib.rs b/custom_mutators/rust/custom_mutator/src/lib.rs index f872241e..09ac11f3 100644 --- a/custom_mutators/rust/custom_mutator/src/lib.rs +++ b/custom_mutators/rust/custom_mutator/src/lib.rs @@ -20,7 +20,7 @@ //! This binding is panic-safe in that it will prevent panics from unwinding into AFL++. Any panic will `abort` at the boundary between the custom mutator and AFL++. //! //! # Access to AFL++ internals -//! This crate has an optional feature "afl_internals", which gives access to AFL++'s internal state. +//! This crate has an optional feature "`afl_internals`", which gives access to AFL++'s internal state. //! The state is passed to [`CustomMutator::init`], when the feature is activated. //! //! _This is completely unsafe and uses automatically generated types extracted from the AFL++ source._ @@ -115,7 +115,7 @@ pub mod wrappers { impl FFIContext { fn from(ptr: *mut c_void) -> ManuallyDrop> { assert!(!ptr.is_null()); - ManuallyDrop::new(unsafe { Box::from_raw(ptr as *mut Self) }) + ManuallyDrop::new(unsafe { Box::from_raw(ptr.cast::()) }) } fn into_ptr(self: Box) -> *const c_void { @@ -141,27 +141,28 @@ pub mod wrappers { } /// panic handler called for every panic - fn panic_handler(method: &str, panic_info: Box) -> ! { + fn panic_handler(method: &str, panic_info: &Box) -> ! { use std::ops::Deref; - let cause = panic_info - .downcast_ref::() - .map(String::deref) - .unwrap_or_else(|| { + let cause = panic_info.downcast_ref::().map_or_else( + || { panic_info .downcast_ref::<&str>() .copied() .unwrap_or("") - }); - eprintln!("A panic occurred at {}: {}", method, cause); + }, + String::deref, + ); + eprintln!("A panic occurred at {method}: {cause}"); abort() } /// Internal function used in the macro #[cfg(not(feature = "afl_internals"))] + #[must_use] pub fn afl_custom_init_(seed: u32) -> *const c_void { match catch_unwind(|| FFIContext::::new(seed).into_ptr()) { Ok(ret) => ret, - Err(err) => panic_handler("afl_custom_init", err), + Err(err) => panic_handler("afl_custom_init", &err), } } @@ -176,7 +177,7 @@ pub mod wrappers { FFIContext::::new(afl, seed).into_ptr() }) { Ok(ret) => ret, - Err(err) => panic_handler("afl_custom_init", err), + Err(err) => panic_handler("afl_custom_init", &err), } } @@ -196,32 +197,27 @@ pub mod wrappers { ) -> usize { match catch_unwind(|| { let mut context = FFIContext::::from(data); - if buf.is_null() { - panic!("null buf passed to afl_custom_fuzz") - } - if out_buf.is_null() { - panic!("null out_buf passed to afl_custom_fuzz") - } + + assert!(!buf.is_null(), "null buf passed to afl_custom_fuzz"); + assert!(!out_buf.is_null(), "null out_buf passed to afl_custom_fuzz"); + let buff_slice = slice::from_raw_parts_mut(buf, buf_size); let add_buff_slice = if add_buf.is_null() { None } else { Some(slice::from_raw_parts(add_buf, add_buf_size)) }; - match context.mutator.fuzz(buff_slice, add_buff_slice, max_size) { - Some(buffer) => { - *out_buf = buffer.as_ptr(); - buffer.len() - } - None => { - // return the input buffer with 0-length to let AFL skip this mutation attempt - *out_buf = buf; - 0 - } + if let Some(buffer) = context.mutator.fuzz(buff_slice, add_buff_slice, max_size) { + *out_buf = buffer.as_ptr(); + buffer.len() + } else { + // return the input buffer with 0-length to let AFL skip this mutation attempt + *out_buf = buf; + 0 } }) { Ok(ret) => ret, - Err(err) => panic_handler("afl_custom_fuzz", err), + Err(err) => panic_handler("afl_custom_fuzz", &err), } } @@ -237,9 +233,8 @@ pub mod wrappers { ) -> u32 { match catch_unwind(|| { let mut context = FFIContext::::from(data); - if buf.is_null() { - panic!("null buf passed to afl_custom_fuzz") - } + assert!(!buf.is_null(), "null buf passed to afl_custom_fuzz"); + let buf_slice = slice::from_raw_parts(buf, buf_size); // see https://doc.rust-lang.org/nomicon/borrow-splitting.html let ctx = &mut **context; @@ -247,7 +242,7 @@ pub mod wrappers { mutator.fuzz_count(buf_slice) }) { Ok(ret) => ret, - Err(err) => panic_handler("afl_custom_fuzz_count", err), + Err(err) => panic_handler("afl_custom_fuzz_count", &err), } } @@ -259,25 +254,27 @@ pub mod wrappers { ) -> bool { match catch_unwind(|| { let mut context = FFIContext::::from(data); - if filename_new_queue.is_null() { - panic!("received null filename_new_queue in afl_custom_queue_new_entry"); - } + assert!( + !filename_new_queue.is_null(), + "received null filename_new_queue in afl_custom_queue_new_entry" + ); + let filename_new_queue = Path::new(OsStr::from_bytes( unsafe { CStr::from_ptr(filename_new_queue) }.to_bytes(), )); - let filename_orig_queue = if !filename_orig_queue.is_null() { + let filename_orig_queue = if filename_orig_queue.is_null() { + None + } else { Some(Path::new(OsStr::from_bytes( unsafe { CStr::from_ptr(filename_orig_queue) }.to_bytes(), ))) - } else { - None }; context .mutator .queue_new_entry(filename_new_queue, filename_orig_queue) }) { Ok(ret) => ret, - Err(err) => panic_handler("afl_custom_queue_new_entry", err), + Err(err) => panic_handler("afl_custom_queue_new_entry", &err), } } @@ -292,7 +289,7 @@ pub mod wrappers { ManuallyDrop::into_inner(FFIContext::::from(data)); }) { Ok(ret) => ret, - Err(err) => panic_handler("afl_custom_deinit", err), + Err(err) => panic_handler("afl_custom_deinit", &err), } } @@ -306,13 +303,13 @@ pub mod wrappers { buf.extend_from_slice(res.as_bytes()); buf.push(0); // unwrapping here, as the error case should be extremely rare - CStr::from_bytes_with_nul(&buf).unwrap().as_ptr() + CStr::from_bytes_with_nul(buf).unwrap().as_ptr() } else { null() } }) { Ok(ret) => ret, - Err(err) => panic_handler("afl_custom_introspection", err), + Err(err) => panic_handler("afl_custom_introspection", &err), } } @@ -329,13 +326,13 @@ pub mod wrappers { buf.extend_from_slice(res.as_bytes()); buf.push(0); // unwrapping here, as the error case should be extremely rare - CStr::from_bytes_with_nul(&buf).unwrap().as_ptr() + CStr::from_bytes_with_nul(buf).unwrap().as_ptr() } else { null() } }) { Ok(ret) => ret, - Err(err) => panic_handler("afl_custom_describe", err), + Err(err) => panic_handler("afl_custom_describe", &err), } } @@ -348,12 +345,12 @@ pub mod wrappers { let mut context = FFIContext::::from(data); assert!(!filename.is_null()); - context.mutator.queue_get(Path::new(OsStr::from_bytes( + u8::from(context.mutator.queue_get(Path::new(OsStr::from_bytes( unsafe { CStr::from_ptr(filename) }.to_bytes(), - ))) as u8 + )))) }) { Ok(ret) => ret, - Err(err) => panic_handler("afl_custom_queue_get", err), + Err(err) => panic_handler("afl_custom_queue_get", &err), } } } @@ -373,7 +370,7 @@ macro_rules! _define_afl_custom_init { }; } -/// An exported macro to defined afl_custom_init meant for insternal usage +/// An exported macro to defined `afl_custom_init` meant for internal usage #[cfg(not(feature = "afl_internals"))] #[macro_export] macro_rules! _define_afl_custom_init { @@ -520,9 +517,10 @@ mod sanity_test { export_mutator!(ExampleMutator); } -#[allow(unused_variables)] /// A custom mutator. /// [`CustomMutator::handle_error`] will be called in case any method returns an [`Result::Err`]. +#[allow(unused_variables)] +#[allow(clippy::missing_errors_doc)] pub trait CustomMutator { /// The error type. All methods must return the same error type. type Error: Debug; @@ -537,7 +535,7 @@ pub trait CustomMutator { .map(|v| !v.is_empty()) .unwrap_or(false) { - eprintln!("Error in custom mutator: {:?}", err) + eprintln!("Error in custom mutator: {err:?}"); } } diff --git a/custom_mutators/rust/example/Cargo.toml b/custom_mutators/rust/example/Cargo.toml index 070d23b1..9d53ebe5 100644 --- a/custom_mutators/rust/example/Cargo.toml +++ b/custom_mutators/rust/example/Cargo.toml @@ -2,7 +2,7 @@ name = "example_mutator" version = "0.1.0" authors = ["Julius Hohnerlein "] -edition = "2018" +edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -- cgit v1.2.3 From a3b56e7280cb5b5cea21c66c40d4390db6f13b8f Mon Sep 17 00:00:00 2001 From: Dominik Maier Date: Thu, 5 Jan 2023 12:25:02 +0000 Subject: rust custom mutator: mark external fns unsafe --- custom_mutators/rust/custom_mutator/src/lib.rs | 11 +++++------ custom_mutators/rust/example_lain/Cargo.toml | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/rust/custom_mutator/src/lib.rs b/custom_mutators/rust/custom_mutator/src/lib.rs index 09ac11f3..3b635eb5 100644 --- a/custom_mutators/rust/custom_mutator/src/lib.rs +++ b/custom_mutators/rust/custom_mutator/src/lib.rs @@ -247,7 +247,7 @@ pub mod wrappers { } /// Internal function used in the macro - pub fn afl_custom_queue_new_entry_( + pub unsafe fn afl_custom_queue_new_entry_( data: *mut c_void, filename_new_queue: *const c_char, filename_orig_queue: *const c_char, @@ -337,7 +337,7 @@ pub mod wrappers { } /// Internal function used in the macro - pub fn afl_custom_queue_get_( + pub unsafe fn afl_custom_queue_get_( data: *mut c_void, filename: *const c_char, ) -> u8 { @@ -441,7 +441,7 @@ macro_rules! export_mutator { } #[no_mangle] - pub extern "C" fn afl_custom_queue_new_entry( + pub unsafe extern "C" fn afl_custom_queue_new_entry( data: *mut ::std::os::raw::c_void, filename_new_queue: *const ::std::os::raw::c_char, filename_orig_queue: *const ::std::os::raw::c_char, @@ -454,7 +454,7 @@ macro_rules! export_mutator { } #[no_mangle] - pub extern "C" fn afl_custom_queue_get( + pub unsafe extern "C" fn afl_custom_queue_get( data: *mut ::std::os::raw::c_void, filename: *const ::std::os::raw::c_char, ) -> u8 { @@ -757,8 +757,7 @@ mod truncate_test { let actual_output = truncate_str_unicode_safe(input, *max_len); assert_eq!( &actual_output, expected_output, - "{:#?} truncated to {} bytes should be {:#?}, but is {:#?}", - input, max_len, expected_output, actual_output + "{input:#?} truncated to {max_len} bytes should be {expected_output:#?}, but is {actual_output:#?}" ); } } diff --git a/custom_mutators/rust/example_lain/Cargo.toml b/custom_mutators/rust/example_lain/Cargo.toml index 29d606a4..c52bf86f 100644 --- a/custom_mutators/rust/example_lain/Cargo.toml +++ b/custom_mutators/rust/example_lain/Cargo.toml @@ -2,7 +2,7 @@ name = "example_lain" version = "0.1.0" authors = ["Julius Hohnerlein "] -edition = "2018" +edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -- cgit v1.2.3 From e3dadbfe0f9fad435a6fa201131315500f1a348a Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Fri, 13 Jan 2023 18:27:22 +0100 Subject: autotokens --- custom_mutators/autotokens/Makefile | 7 + custom_mutators/autotokens/autotokens.cpp | 391 ++++++++++++++++++++++++++++++ 2 files changed, 398 insertions(+) create mode 100644 custom_mutators/autotokens/Makefile create mode 100644 custom_mutators/autotokens/autotokens.cpp (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/Makefile b/custom_mutators/autotokens/Makefile new file mode 100644 index 00000000..1ee7f5c4 --- /dev/null +++ b/custom_mutators/autotokens/Makefile @@ -0,0 +1,7 @@ +all: autotokens.so + +autotokens.so: autotokens.cpp + $(CXX) -O3 -shared -fPIC -o autotokens.so -I../../include autotokens.cpp ../../src/afl-performance.o + +clean: + rm -f autotokens.so *~ core \ No newline at end of file diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp new file mode 100644 index 00000000..afde8c26 --- /dev/null +++ b/custom_mutators/autotokens/autotokens.cpp @@ -0,0 +1,391 @@ +extern "C" { +#include "afl-fuzz.h" +} + +#include +#include +#include + +#include +#include +#include +#include +#include + +#define AUTOTOKENS_DEBUG 1 +#define AUTOTOKENS_LEN_MIN 12 +#define AUTOTOKENS_CHANGE_MIN_PERCENT 5 +#define AUTOTOKENS_CHANGE_MAX_PERCENT 10 + +using namespace std; + +typedef struct my_mutator { + + afl_state *afl; + +} my_mutator_t; + +#define DEBUG \ + if (unlikely(debug)) fprintf + +static afl_state *afl_ptr; +static int debug = AUTOTOKENS_DEBUG; +static u32 current_id = 0; +static unordered_map *> file_mapping; +static unordered_map token_to_id; +static unordered_map id_to_token; +static regex regex_comment_slash("(//.*)([\r\n]?)", regex::optimize); +static regex regex_comment_star("/\\*(.|\n)*?\\*/", + regex::multiline | regex::optimize); +static regex regex_string("\"(.*?)\"|'(.*?')", regex::optimize); +static regex regex_word("[A-Za-z0-9_$]+", regex::optimize); +static regex regex_whitespace(R"([ \t]+)", regex::optimize); +static vector *s; + +extern "C" size_t afl_custom_fuzz(my_mutator_t *data, uint8_t *buf, size_t buf_size, + u8 **out_buf, uint8_t *add_buf, + size_t add_buf_size, size_t max_size) { + + DEBUG(stderr, "MUT!\n"); + + if (s == NULL) { return 0; } + + vector m = *s; + u32 i, m_size = (u32)m.size(); + + u32 rounds = MAX(8, MIN(m_size >> 3, HAVOC_CYCLES * afl_ptr->queue_cur->perf_score * afl_ptr->havoc_div / 256)); + DEBUG(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds); + + for (i = 0; i < rounds; ++i) { + + u32 item, new_item; + + switch(rand_below(afl_ptr, 4)) { + /* CHANGE */ + case 0: /* fall through */ + case 1: + item = rand_below(afl_ptr, m_size); + do { + new_item = 1 + rand_below(afl_ptr, current_id); + } while(unlikely(new_item == m[item])); + m[item] = new_item; + break; + /* INSERT (+1 so we insert also after last place) */ + case 2: + new_item = 1 + rand_below(afl_ptr, current_id); + m.insert(m.begin() + rand_below(afl_ptr, m_size + 1), new_item); + ++m_size; + break; + /* ERASE - only if large enough */ + case 3: + if (m_size > 8) { m.erase(m.begin() + rand_below(afl_ptr, m_size)); } + --m_size; + break; + } + + } + + string output; + u32 m_size_1 = m_size - 1; + for (i = 0; i < m_size; ++i) { + output += id_to_token[m[i]]; + if (likely(i < m_size_1)) { output += " "; } + } + + u32 mutated_size = output.size(); + u8 *mutated_out = (u8*)afl_realloc((void**)out_buf, mutated_size); + + if (unlikely(!mutated_out)) { + + *out_buf = NULL; + return 0; + + } + + /* + *out_buf = buf; + return buf_size; + */ + memcpy(mutated_out, output.data(), mutated_size); + *out_buf = mutated_out; + DEBUG(stderr, "MUTATED to %u bytes:\n%s\n---\n", mutated_size, mutated_out); + return mutated_size; + +} + + +/* We are not using afl_custom_queue_new_entry() because not every corpus entry + will be necessarily fuzzed. so we use afl_custom_queue_get() instead */ + +extern "C" unsigned char afl_custom_queue_get(void *data, + const unsigned char *filename) { + + if (likely(!debug)) + if (!afl_ptr->queue_cur->is_ascii) { s = NULL; return 0; } + + vector *structure = NULL; + string fn = (char *)filename; + + auto entry = file_mapping.find(fn); + if (entry == file_mapping.end()) { + + // this input file was not analyzed for tokens yet, so let's do it! + + FILE *fp = fopen((char *)filename, "rb"); + if (!fp) { s = NULL; return 0; } // should not happen + fseek(fp, 0, SEEK_END); + size_t len = (size_t)ftell(fp); + if (len < AUTOTOKENS_LEN_MIN) { + + fclose(fp); + file_mapping[fn] = structure; // NULL ptr so we don't read the file again + DEBUG(stderr, "Too short (%lu) %s\n", len, filename); + s = NULL; + return 0; + + } + + string input; + input.resize(len); + rewind(fp); + fread(input.data(), input.size(), 1, fp); + fclose(fp); + + // DEBUG(stderr, "Read %lu bytes for %s\nBefore comment trim:\n%s\n", + // input.size(), filename, input.c_str()); + + input = regex_replace(input, regex_comment_slash, "$2"); + input = regex_replace(input, regex_comment_star, ""); + + DEBUG(stderr, "After replace %lu bytes for %s\n%s\n", input.size(), + filename, input.c_str()); + + /* + u32 spaces = count(input.begin(), input.end(), ' '); + u32 tabs = count(input.begin(), input.end(), '\t'); + u32 linefeeds = count(input.begin(), input.end(), '\n'); + bool ends_with_linefeed = input[input.length() - 1] == '\n'; + DEBUG(stderr, "spaces=%u tabs=%u linefeeds=%u ends=%u\n", spaces, tabs, + linefeeds, ends_with_linefeed); + */ + + // now extract all tokens + vector tokens; + smatch match; + string::const_iterator cur = input.begin(), ende = input.end(), last = cur, + found, prev; + + DEBUG(stderr, "MATCHES:\n"); + while (regex_search(cur, ende, match, regex_string)) { + + prev = cur; + found = match[1].first; + cur = match[1].second; + DEBUG(stderr, + "string \"%s\" found at start %lu offset %lu continue at %lu\n", + match[1].str().c_str(), prev - input.begin(), match.position(), + cur - input.begin()); + if (prev < found) { // there are items between search start and find + sregex_token_iterator it{prev, found, regex_whitespace, -1}; + vector tokenized{it, {}}; + tokenized.erase( + std::remove_if(tokenized.begin(), tokenized.end(), + [](std::string const &s) { return s.size() == 0; }), + tokenized.end()); + tokens.reserve(tokens.size() + tokenized.size() * 2 + 1); + + DEBUG(stderr, "tokens: %lu input size: %lu\n", tokenized.size(), + input.size()); + for (auto x : tokenized) { + + cerr << x << endl; + + } + + for (auto token : tokenized) { + + string::const_iterator c = token.begin(), e = token.end(), f, p; + smatch m; + + while (regex_search(c, e, m, regex_word)) { + + p = c; + f = m[0].first; + c = m[0].second; + if (p < f) { + + // there are items between search start and find + string foo(p, f); + DEBUG(stderr, "before string: \"%s\"\n", foo.c_str()); + tokens.push_back(std::string(p, f)); + + } + + DEBUG(stderr, + "SUBstring \"%s\" found at start %lu offset %lu continue at " + "%lu\n", + m[0].str().c_str(), p - input.begin(), m.position(), + c - token.begin()); + tokens.push_back(m[0].str()); + + } + + if (c < e) { + + string foo(c, e); + DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); + tokens.push_back(std::string(c, e)); + + } + + } + + } + + if (match[1].length() > 0) { tokens.push_back(match[1]); } + + } + + if (cur < ende) { + + DEBUG(stderr, "REST!\n"); + + sregex_token_iterator it{cur, ende, regex_whitespace, -1}; + vector tokenized{it, {}}; + tokenized.erase( + std::remove_if(tokenized.begin(), tokenized.end(), + [](std::string const &s) { return s.size() == 0; }), + tokenized.end()); + tokens.reserve(tokens.size() + tokenized.size() * 2 + 1); + + DEBUG(stderr, "tokens: %lu input size: %lu\n", tokenized.size(), + input.size()); + for (auto x : tokenized) { + + cerr << x << endl; + + } + + for (auto token : tokenized) { + + string::const_iterator c = token.begin(), e = token.end(), f, p; + smatch m; + + while (regex_search(c, e, m, regex_word)) { + + p = c; + f = m[0].first; + c = m[0].second; + if (p < f) { + + // there are items between search start and find + string foo(p, f); + DEBUG(stderr, "before string: \"%s\"\n", foo.c_str()); + tokens.push_back(std::string(p, f)); + + } + + DEBUG(stderr, + "SUB2string \"%s\" found at start %lu offset %lu continue at " + "%lu\n", + m[0].str().c_str(), p - input.begin(), m.position(), + c - token.begin()); + tokens.push_back(m[0].str()); + + } + + if (c < e) { + + string foo(c, e); + DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); + tokens.push_back(std::string(c, e)); + + } + + } + + } + + DEBUG(stderr, "DUMPING TOKENS:\n"); + if (unlikely(debug)) + for (u32 i = 0; i < tokens.size(); ++i) { + + DEBUG(stderr, "%s ", tokens[i].c_str()); + + } + + DEBUG(stderr, "---------------------------\n"); + + /* Now we transform the tokens into an ID list and saved that */ + + structure = new vector(); + u32 id; + + for (u32 i = 0; i < tokens.size(); ++i) { + + if ((id = token_to_id[tokens[i]]) == 0) { + + // First time we see this token, add it to the list + ++current_id; + token_to_id[tokens[i]] = current_id; + id_to_token[current_id] = tokens[i]; + structure->push_back(current_id); + + } else { + + structure->push_back(id); + + } + + } + + // save the token structure to the file mapping + file_mapping[fn] = structure; + s = structure; + + // we are done! + DEBUG(stderr, "DONE! We have %lu tokens in the structure\n", + structure->size()); + + } else { + + if (entry->second == NULL) { + + DEBUG(stderr, "Skipping %s\n", filename); + s = NULL; + return 0; + + } + + s = entry->second; + DEBUG(stderr, "OK %s\n", filename); + + } + + return 1; // we always fuzz unless non-ascii or too small + +} + +extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { + + (void)(seed); + my_mutator_t *data = (my_mutator_t *)calloc(1, sizeof(my_mutator_t)); + if (!data) { + + perror("afl_custom_init alloc"); + return NULL; + + } + + data->afl = afl_ptr = afl; + + return data; + +} + +extern "C" void afl_custom_deinit(my_mutator_t *data) { + + free(data); + +} + -- cgit v1.2.3 From 9548af52b266ecc2aed81f388f7a1a7a3fcfb181 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Sat, 14 Jan 2023 09:30:25 +0100 Subject: texts --- custom_mutators/autotokens/README | 12 ++++++++++++ custom_mutators/autotokens/TODO | 13 +++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 custom_mutators/autotokens/README create mode 100644 custom_mutators/autotokens/TODO (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README new file mode 100644 index 00000000..6849279e --- /dev/null +++ b/custom_mutators/autotokens/README @@ -0,0 +1,12 @@ +# autotokens + +This implements an improved autotoken idea presented in +[Token-Level Fuzzing][https://www.usenix.org/system/files/sec21-salls.pdf]. +It is a grammar fuzzer without actually knowing the grammar. + +It is recommended to run with together in an instance with `CMPLOG`. + +If you have a dictionary (`-x`) this improves this custom grammar mutator. + +If **not** run with `CMPLOG`, it is possible to set `AFL_CUSTOM_MUTATOR_ONLY`, +to concentrate on grammar bug classes. diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO new file mode 100644 index 00000000..700b3fa7 --- /dev/null +++ b/custom_mutators/autotokens/TODO @@ -0,0 +1,13 @@ +whitespace belassen oder notieren? MAYBE +0=space 1=tab 2=linefeed + +dictionary mitverwenden? JA aber nur ascii +-> neue liste? +wie mache ich das bei honggfuzz? +ansonsten neuer custom mutator entrypoint? + +nur is_ascii wenn cmplog aktiv, ansonsten eigene implementierung +die aber dann dafür sorgt dass eine leere struktur da ist. +is is_ascii in afl-common.o ? + +cmplog: only add tokens that were found to fit? -- cgit v1.2.3 From 35801bed7a5feb8cc3a363bafbd577f256c467f6 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Sun, 15 Jan 2023 13:47:31 +0100 Subject: dictionary support --- custom_mutators/autotokens/TODO | 15 +- custom_mutators/autotokens/autotokens.cpp | 248 +++++++++++++++++++++++------- 2 files changed, 197 insertions(+), 66 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO index 700b3fa7..2e5e384f 100644 --- a/custom_mutators/autotokens/TODO +++ b/custom_mutators/autotokens/TODO @@ -1,13 +1,12 @@ whitespace belassen oder notieren? MAYBE 0=space 1=tab 2=linefeed -dictionary mitverwenden? JA aber nur ascii --> neue liste? -wie mache ich das bei honggfuzz? -ansonsten neuer custom mutator entrypoint? +cmplog: only add tokens that were found to fit? + +create from thin air if no good seed after a cycle and dict large enough? +(static u32 no_of_struct_inputs;) + +splice insert, splice overwrite +(linefeed, semicolon) -nur is_ascii wenn cmplog aktiv, ansonsten eigene implementierung -die aber dann dafür sorgt dass eine leere struktur da ist. -is is_ascii in afl-common.o ? -cmplog: only add tokens that were found to fit? diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index afde8c26..2fad8dd7 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -1,5 +1,7 @@ extern "C" { + #include "afl-fuzz.h" + } #include @@ -13,9 +15,7 @@ extern "C" { #include #define AUTOTOKENS_DEBUG 1 -#define AUTOTOKENS_LEN_MIN 12 -#define AUTOTOKENS_CHANGE_MIN_PERCENT 5 -#define AUTOTOKENS_CHANGE_MAX_PERCENT 10 +#define AUTOTOKENS_CHANGE_MIN 8 using namespace std; @@ -31,43 +31,55 @@ typedef struct my_mutator { static afl_state *afl_ptr; static int debug = AUTOTOKENS_DEBUG; static u32 current_id = 0; +static u32 valid_structures = 0; +static u32 extras_cnt = 0, a_extras_cnt = 0; static unordered_map *> file_mapping; static unordered_map token_to_id; static unordered_map id_to_token; -static regex regex_comment_slash("(//.*)([\r\n]?)", regex::optimize); -static regex regex_comment_star("/\\*(.|\n)*?\\*/", - regex::multiline | regex::optimize); -static regex regex_string("\"(.*?)\"|'(.*?')", regex::optimize); -static regex regex_word("[A-Za-z0-9_$]+", regex::optimize); -static regex regex_whitespace(R"([ \t]+)", regex::optimize); -static vector *s; +static regex regex_comment_slash("(//.*)([\r\n]?)", regex::optimize); +static regex regex_comment_star("/\\*(.|\n)*?\\*/", + regex::multiline | regex::optimize); +static regex regex_string("\"(.*?)\"|'(.*?')", regex::optimize); +static regex regex_word("[A-Za-z0-9_$]+", regex::optimize); +static regex regex_whitespace(R"([ \t]+)", regex::optimize); +static vector *s; // the structure of the currently selected input -extern "C" size_t afl_custom_fuzz(my_mutator_t *data, uint8_t *buf, size_t buf_size, - u8 **out_buf, uint8_t *add_buf, - size_t add_buf_size, size_t max_size) { +extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, + u8 **out_buf, u8 *add_buf, + size_t add_buf_size, size_t max_size) { - DEBUG(stderr, "MUT!\n"); + if (s == NULL) { + + *out_buf = NULL; + return 0; - if (s == NULL) { return 0; } + } - vector m = *s; - u32 i, m_size = (u32)m.size(); + vector m = *s; // copy of the structure we will modify + u32 i, m_size = (u32)m.size(); - u32 rounds = MAX(8, MIN(m_size >> 3, HAVOC_CYCLES * afl_ptr->queue_cur->perf_score * afl_ptr->havoc_div / 256)); - DEBUG(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds); + u32 rounds = + MAX(AUTOTOKENS_CHANGE_MIN, + MIN(m_size >> 3, HAVOC_CYCLES * afl_ptr->queue_cur->perf_score * + afl_ptr->havoc_div / 256)); + // DEBUG(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds); for (i = 0; i < rounds; ++i) { - + u32 item, new_item; - - switch(rand_below(afl_ptr, 4)) { + + switch (rand_below(afl_ptr, 4)) { + /* CHANGE */ - case 0: /* fall through */ + case 0: /* fall through */ case 1: item = rand_below(afl_ptr, m_size); do { + new_item = 1 + rand_below(afl_ptr, current_id); - } while(unlikely(new_item == m[item])); + + } while (unlikely(new_item == m[item])); + m[item] = new_item; break; /* INSERT (+1 so we insert also after last place) */ @@ -81,31 +93,32 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, uint8_t *buf, size_t buf_s if (m_size > 8) { m.erase(m.begin() + rand_below(afl_ptr, m_size)); } --m_size; break; + // TODO: add full line insert splice, replace splace, delete + } - + } - + string output; - u32 m_size_1 = m_size - 1; + u32 m_size_1 = m_size - 1; + for (i = 0; i < m_size; ++i) { + output += id_to_token[m[i]]; if (likely(i < m_size_1)) { output += " "; } + } u32 mutated_size = output.size(); - u8 *mutated_out = (u8*)afl_realloc((void**)out_buf, mutated_size); + u8 *mutated_out = (u8 *)afl_realloc((void **)out_buf, mutated_size); if (unlikely(!mutated_out)) { - + *out_buf = NULL; return 0; - + } - /* - *out_buf = buf; - return buf_size; - */ memcpy(mutated_out, output.data(), mutated_size); *out_buf = mutated_out; DEBUG(stderr, "MUTATED to %u bytes:\n%s\n---\n", mutated_size, mutated_out); @@ -113,29 +126,106 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, uint8_t *buf, size_t buf_s } - /* We are not using afl_custom_queue_new_entry() because not every corpus entry will be necessarily fuzzed. so we use afl_custom_queue_get() instead */ extern "C" unsigned char afl_custom_queue_get(void *data, const unsigned char *filename) { - if (likely(!debug)) - if (!afl_ptr->queue_cur->is_ascii) { s = NULL; return 0; } + if (likely(!debug)) { + + if (afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) { + + s = NULL; + return 0; + + } + + } + + // check if there are new dictionary entries and add them to the tokens + if (valid_structures) { + + while (extras_cnt < afl_ptr->extras_cnt) { + + u32 ok = 1, l = afl_ptr->extras[extras_cnt].len; + u8 *ptr = afl_ptr->extras[extras_cnt].data; + + for (u32 i = 0; i < l; ++i) { + + if (!isascii((int)ptr[i]) && !isprint((int)ptr[i])) { + + ok = 0; + break; + + } + + } + + if (ok) { + + ++current_id; + token_to_id[(char *)ptr] = current_id; + id_to_token[current_id] = (char *)ptr; + + } + + ++extras_cnt; + DEBUG(stderr, "Added from dictionary: \"%s\"\n", ptr); + + } + + while (a_extras_cnt < afl_ptr->a_extras_cnt) { + + u32 ok = 1, l = afl_ptr->a_extras[a_extras_cnt].len; + u8 *ptr = afl_ptr->a_extras[a_extras_cnt].data; + + for (u32 i = 0; i < l; ++i) { + + if (!isascii((int)ptr[i]) && !isprint((int)ptr[i])) { + + ok = 0; + break; + + } + + } + + if (ok) { + + ++current_id; + token_to_id[(char *)ptr] = current_id; + id_to_token[current_id] = (char *)ptr; + + } + + ++a_extras_cnt; + DEBUG(stderr, "Added from auto dictionary: \"%s\"\n", ptr); + + } + + } vector *structure = NULL; string fn = (char *)filename; + auto entry = file_mapping.find(fn); - auto entry = file_mapping.find(fn); if (entry == file_mapping.end()) { // this input file was not analyzed for tokens yet, so let's do it! FILE *fp = fopen((char *)filename, "rb"); - if (!fp) { s = NULL; return 0; } // should not happen + if (!fp) { + + s = NULL; + return 0; + + } // should not happen + fseek(fp, 0, SEEK_END); size_t len = (size_t)ftell(fp); - if (len < AUTOTOKENS_LEN_MIN) { + + if (len < AFL_TXT_MIN_LEN) { fclose(fp); file_mapping[fn] = structure; // NULL ptr so we don't read the file again @@ -151,6 +241,30 @@ extern "C" unsigned char afl_custom_queue_get(void *data, fread(input.data(), input.size(), 1, fp); fclose(fp); + if (!afl_ptr->shm.cmplog_mode) { + + // not running with CMPLOG? bad choice, but whatever ... + // we only want text inputs, so we have to check it ourselves. + + u32 valid_chars = 0; + for (u32 i = 0; i < len; ++i) { + + if (isascii((int)input[i]) || isprint((int)input[i])) { ++valid_chars; } + + } + + // we want at least 95% of text characters ... + if (((len * AFL_TXT_MIN_PERCENT) / 100) > valid_chars) { + + file_mapping[fn] = NULL; + DEBUG(stderr, "Not text (%lu) %s\n", len, filename); + s = NULL; + return 0; + + } + + } + // DEBUG(stderr, "Read %lu bytes for %s\nBefore comment trim:\n%s\n", // input.size(), filename, input.c_str()); @@ -175,7 +289,6 @@ extern "C" unsigned char afl_custom_queue_get(void *data, string::const_iterator cur = input.begin(), ende = input.end(), last = cur, found, prev; - DEBUG(stderr, "MATCHES:\n"); while (regex_search(cur, ende, match, regex_string)) { prev = cur; @@ -196,11 +309,12 @@ extern "C" unsigned char afl_custom_queue_get(void *data, DEBUG(stderr, "tokens: %lu input size: %lu\n", tokenized.size(), input.size()); - for (auto x : tokenized) { + if (unlikely(debug)) + for (auto x : tokenized) { - cerr << x << endl; + cerr << x << endl; - } + } for (auto token : tokenized) { @@ -232,8 +346,13 @@ extern "C" unsigned char afl_custom_queue_get(void *data, if (c < e) { - string foo(c, e); - DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); + if (unlikely(debug)) { + + string foo(c, e); + DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); + + } + tokens.push_back(std::string(c, e)); } @@ -248,8 +367,6 @@ extern "C" unsigned char afl_custom_queue_get(void *data, if (cur < ende) { - DEBUG(stderr, "REST!\n"); - sregex_token_iterator it{cur, ende, regex_whitespace, -1}; vector tokenized{it, {}}; tokenized.erase( @@ -260,11 +377,12 @@ extern "C" unsigned char afl_custom_queue_get(void *data, DEBUG(stderr, "tokens: %lu input size: %lu\n", tokenized.size(), input.size()); - for (auto x : tokenized) { + if (unlikely(debug)) + for (auto x : tokenized) { - cerr << x << endl; + cerr << x << endl; - } + } for (auto token : tokenized) { @@ -279,8 +397,13 @@ extern "C" unsigned char afl_custom_queue_get(void *data, if (p < f) { // there are items between search start and find - string foo(p, f); - DEBUG(stderr, "before string: \"%s\"\n", foo.c_str()); + if (unlikely(debug)) { + + string foo(p, f); + DEBUG(stderr, "before string: \"%s\"\n", foo.c_str()); + + } + tokens.push_back(std::string(p, f)); } @@ -296,8 +419,13 @@ extern "C" unsigned char afl_custom_queue_get(void *data, if (c < e) { - string foo(c, e); - DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); + if (unlikely(debug)) { + + string foo(c, e); + DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); + + } + tokens.push_back(std::string(c, e)); } @@ -306,15 +434,18 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - DEBUG(stderr, "DUMPING TOKENS:\n"); - if (unlikely(debug)) + if (unlikely(debug)) { + + DEBUG(stderr, "DUMPING TOKENS:\n"); for (u32 i = 0; i < tokens.size(); ++i) { DEBUG(stderr, "%s ", tokens[i].c_str()); } - DEBUG(stderr, "---------------------------\n"); + DEBUG(stderr, "---------------------------\n"); + + } /* Now we transform the tokens into an ID list and saved that */ @@ -342,6 +473,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, // save the token structure to the file mapping file_mapping[fn] = structure; s = structure; + ++valid_structures; // we are done! DEBUG(stderr, "DONE! We have %lu tokens in the structure\n", -- cgit v1.2.3 From 10b82c72772f40f703119fc7cd1c9063500a6bbe Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Sun, 15 Jan 2023 18:17:28 +0100 Subject: fixes --- custom_mutators/autotokens/Makefile | 2 +- custom_mutators/autotokens/autotokens.cpp | 40 ++++++++++++++++++++++--------- 2 files changed, 30 insertions(+), 12 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/Makefile b/custom_mutators/autotokens/Makefile index 1ee7f5c4..5dd52dee 100644 --- a/custom_mutators/autotokens/Makefile +++ b/custom_mutators/autotokens/Makefile @@ -1,7 +1,7 @@ all: autotokens.so autotokens.so: autotokens.cpp - $(CXX) -O3 -shared -fPIC -o autotokens.so -I../../include autotokens.cpp ../../src/afl-performance.o + $(CXX) -g -O3 $(CFLAGS) -shared -fPIC -o autotokens.so -I../../include autotokens.cpp ../../src/afl-performance.o clean: rm -f autotokens.so *~ core \ No newline at end of file diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 2fad8dd7..9fbdf52a 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -14,7 +14,7 @@ extern "C" { #include #include -#define AUTOTOKENS_DEBUG 1 +#define AUTOTOKENS_DEBUG 0 #define AUTOTOKENS_CHANGE_MIN 8 using namespace std; @@ -64,11 +64,13 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, afl_ptr->havoc_div / 256)); // DEBUG(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds); + u32 max_rand = 4; + for (i = 0; i < rounds; ++i) { u32 item, new_item; - switch (rand_below(afl_ptr, 4)) { + switch (rand_below(afl_ptr, max_rand)) { /* CHANGE */ case 0: /* fall through */ @@ -90,9 +92,19 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, break; /* ERASE - only if large enough */ case 3: - if (m_size > 8) { m.erase(m.begin() + rand_below(afl_ptr, m_size)); } - --m_size; + if (m_size > 8) { + + m.erase(m.begin() + rand_below(afl_ptr, m_size)); + --m_size; + + } else { + + max_rand = 3; + + } + break; + // TODO: add full line insert splice, replace splace, delete } @@ -119,9 +131,16 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } + if (unlikely(debug)) { + + DEBUG(stderr, "MUTATED to %u bytes:\n", mutated_size); + fwrite(output.data(), 1, mutated_size, stderr); + DEBUG(stderr, "\n---\n"); + + } + memcpy(mutated_out, output.data(), mutated_size); *out_buf = mutated_out; - DEBUG(stderr, "MUTATED to %u bytes:\n%s\n---\n", mutated_size, mutated_out); return mutated_size; } @@ -292,11 +311,10 @@ extern "C" unsigned char afl_custom_queue_get(void *data, while (regex_search(cur, ende, match, regex_string)) { prev = cur; - found = match[1].first; - cur = match[1].second; - DEBUG(stderr, - "string \"%s\" found at start %lu offset %lu continue at %lu\n", - match[1].str().c_str(), prev - input.begin(), match.position(), + found = match[0].first; + cur = match[0].second; + DEBUG(stderr, "string %s found at start %lu offset %lu continue at %lu\n", + match[0].str().c_str(), prev - input.begin(), match.position(), cur - input.begin()); if (prev < found) { // there are items between search start and find sregex_token_iterator it{prev, found, regex_whitespace, -1}; @@ -361,7 +379,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - if (match[1].length() > 0) { tokens.push_back(match[1]); } + if (match[0].length() > 0) { tokens.push_back(match[0]); } } -- cgit v1.2.3 From 4b915207c42f8100f306778f617d7003c3e2193f Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Mon, 16 Jan 2023 17:05:04 +0100 Subject: autotokens - much better tokenizer --- custom_mutators/autotokens/autotokens.cpp | 307 +++++++++++++++++------------- 1 file changed, 179 insertions(+), 128 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 9fbdf52a..850692a1 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -28,22 +28,41 @@ typedef struct my_mutator { #define DEBUG \ if (unlikely(debug)) fprintf -static afl_state *afl_ptr; -static int debug = AUTOTOKENS_DEBUG; -static u32 current_id = 0; -static u32 valid_structures = 0; -static u32 extras_cnt = 0, a_extras_cnt = 0; +static afl_state *afl_ptr; +static int debug = AUTOTOKENS_DEBUG; +static u32 current_id; +static u32 valid_structures; +static u32 whitespace_ids; +static u32 extras_cnt, a_extras_cnt; +static u64 all_spaces, all_tabs, all_lf, all_ws; static unordered_map *> file_mapping; static unordered_map token_to_id; static unordered_map id_to_token; -static regex regex_comment_slash("(//.*)([\r\n]?)", regex::optimize); -static regex regex_comment_star("/\\*(.|\n)*?\\*/", - regex::multiline | regex::optimize); -static regex regex_string("\"(.*?)\"|'(.*?')", regex::optimize); -static regex regex_word("[A-Za-z0-9_$]+", regex::optimize); -static regex regex_whitespace(R"([ \t]+)", regex::optimize); +// static regex regex_comment_slash("(//.*)([\r\n]?)", regex::optimize); +static regex regex_comment_star("/\\*([:print:]|\n)*?\\*/", + regex::multiline | regex::optimize); +static regex regex_string("\"[[:print:]]*?\"|'[[:print:]]*?'", regex::optimize); static vector *s; // the structure of the currently selected input +u32 good_whitespace_or_singleval() { + + u32 i = rand_below(afl_ptr, current_id); + if (id_to_token[i].size() == 1) { return i; } + i = rand_below(afl_ptr, all_ws); + if (i < all_spaces) { + + return 0; + + } else if (i < all_tabs) { + + return 1; + + } else + + return 2; // linefeed + +} + extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, u8 **out_buf, u8 *add_buf, size_t add_buf_size, size_t max_size) { @@ -68,30 +87,76 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, for (i = 0; i < rounds; ++i) { - u32 item, new_item; - switch (rand_below(afl_ptr, max_rand)) { /* CHANGE */ case 0: /* fall through */ - case 1: - item = rand_below(afl_ptr, m_size); + case 1: { + + u32 pos = rand_below(afl_ptr, m_size); + u32 cur_item = m[pos], new_item; do { - new_item = 1 + rand_below(afl_ptr, current_id); + new_item = rand_below(afl_ptr, current_id); - } while (unlikely(new_item == m[item])); + } while (unlikely( - m[item] = new_item; + new_item == cur_item || + (whitespace_ids < new_item && whitespace_ids >= cur_item) || + (whitespace_ids >= new_item && whitespace_ids < cur_item))); + + DEBUG(stderr, "MUT: %u -> %u\n", cur_item, new_item); + m[pos] = new_item; break; - /* INSERT (+1 so we insert also after last place) */ - case 2: - new_item = 1 + rand_below(afl_ptr, current_id); - m.insert(m.begin() + rand_below(afl_ptr, m_size + 1), new_item); + + } + + /* INSERT (m_size +1 so we insert also after last place) */ + case 2: { + + u32 new_item; + do { + + new_item = rand_below(afl_ptr, current_id); + + } while (new_item >= whitespace_ids); + + u32 pos = rand_below(afl_ptr, m_size + 1); + m.insert(m.begin() + pos, new_item); ++m_size; + + // if we insert an identifier or string we might need whitespace + if (id_to_token[new_item].size() > 1) { + + // need to insert before? + + if (pos && m[pos - 1] >= whitespace_ids && + id_to_token[m[pos - 1]].size() > 1) { + + m.insert(m.begin() + pos, good_whitespace_or_singleval()); + ++m_size; + + } + + if (pos + 1 < m_size && m[pos + 1] >= whitespace_ids && + id_to_token[m[pos + 1]].size() > 1) { + + // need to insert after? + + m.insert(m.begin() + pos + 1, good_whitespace_or_singleval()); + ++m_size; + + } + + } + break; + + } + /* ERASE - only if large enough */ - case 3: + case 3: { + if (m_size > 8) { m.erase(m.begin() + rand_below(afl_ptr, m_size)); @@ -105,6 +170,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, break; + } + // TODO: add full line insert splice, replace splace, delete } @@ -112,12 +179,10 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } string output; - u32 m_size_1 = m_size - 1; for (i = 0; i < m_size; ++i) { output += id_to_token[m[i]]; - if (likely(i < m_size_1)) { output += " "; } } @@ -183,9 +248,9 @@ extern "C" unsigned char afl_custom_queue_get(void *data, if (ok) { - ++current_id; token_to_id[(char *)ptr] = current_id; id_to_token[current_id] = (char *)ptr; + ++current_id; } @@ -212,9 +277,9 @@ extern "C" unsigned char afl_custom_queue_get(void *data, if (ok) { - ++current_id; token_to_id[(char *)ptr] = current_id; id_to_token[current_id] = (char *)ptr; + ++current_id; } @@ -257,7 +322,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, string input; input.resize(len); rewind(fp); - fread(input.data(), input.size(), 1, fp); + fread((void *)input.data(), input.size(), 1, fp); fclose(fp); if (!afl_ptr->shm.cmplog_mode) { @@ -287,28 +352,34 @@ extern "C" unsigned char afl_custom_queue_get(void *data, // DEBUG(stderr, "Read %lu bytes for %s\nBefore comment trim:\n%s\n", // input.size(), filename, input.c_str()); - input = regex_replace(input, regex_comment_slash, "$2"); + // input = regex_replace(input, regex_comment_slash, "$2"); input = regex_replace(input, regex_comment_star, ""); DEBUG(stderr, "After replace %lu bytes for %s\n%s\n", input.size(), filename, input.c_str()); - /* - u32 spaces = count(input.begin(), input.end(), ' '); - u32 tabs = count(input.begin(), input.end(), '\t'); - u32 linefeeds = count(input.begin(), input.end(), '\n'); + u32 spaces = count(input.begin(), input.end(), ' '); + u32 tabs = count(input.begin(), input.end(), '\t'); + u32 linefeeds = count(input.begin(), input.end(), '\n'); bool ends_with_linefeed = input[input.length() - 1] == '\n'; DEBUG(stderr, "spaces=%u tabs=%u linefeeds=%u ends=%u\n", spaces, tabs, linefeeds, ends_with_linefeed); - */ + all_spaces += spaces; + all_tabs += tabs; + all_lf += linefeeds; + all_ws = all_spaces + all_tabs + all_lf; // now extract all tokens vector tokens; smatch match; - string::const_iterator cur = input.begin(), ende = input.end(), last = cur, - found, prev; + string::const_iterator cur = input.begin(), ende = input.end(), found, prev; - while (regex_search(cur, ende, match, regex_string)) { + DEBUG(stderr, "START!\n"); + + while (regex_search(cur, ende, match, regex_string, + regex_constants::match_any | + regex_constants::match_not_null | + regex_constants::match_continuous)) { prev = cur; found = match[0].first; @@ -316,62 +387,42 @@ extern "C" unsigned char afl_custom_queue_get(void *data, DEBUG(stderr, "string %s found at start %lu offset %lu continue at %lu\n", match[0].str().c_str(), prev - input.begin(), match.position(), cur - input.begin()); + if (prev < found) { // there are items between search start and find - sregex_token_iterator it{prev, found, regex_whitespace, -1}; - vector tokenized{it, {}}; - tokenized.erase( - std::remove_if(tokenized.begin(), tokenized.end(), - [](std::string const &s) { return s.size() == 0; }), - tokenized.end()); - tokens.reserve(tokens.size() + tokenized.size() * 2 + 1); + while (prev < found) { - DEBUG(stderr, "tokens: %lu input size: %lu\n", tokenized.size(), - input.size()); - if (unlikely(debug)) - for (auto x : tokenized) { + if (isspace(*prev)) { - cerr << x << endl; + auto start = prev; + while (isspace(*prev)) { - } + ++prev; - for (auto token : tokenized) { + } - string::const_iterator c = token.begin(), e = token.end(), f, p; - smatch m; + tokens.push_back(std::string(start, prev)); + DEBUG(stderr, "WHITESPACE %ld \"%s\"\n", prev - start, + tokens[tokens.size() - 1].c_str()); - while (regex_search(c, e, m, regex_word)) { + } else if (isalnum(*prev) || *prev == '$' || *prev == '_') { - p = c; - f = m[0].first; - c = m[0].second; - if (p < f) { + auto start = prev; + while (isalnum(*prev) || *prev == '$' || *prev == '_' || + *prev == '.' || *prev == '/') { - // there are items between search start and find - string foo(p, f); - DEBUG(stderr, "before string: \"%s\"\n", foo.c_str()); - tokens.push_back(std::string(p, f)); + ++prev; } - DEBUG(stderr, - "SUBstring \"%s\" found at start %lu offset %lu continue at " - "%lu\n", - m[0].str().c_str(), p - input.begin(), m.position(), - c - token.begin()); - tokens.push_back(m[0].str()); - - } - - if (c < e) { - - if (unlikely(debug)) { - - string foo(c, e); - DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); + tokens.push_back(std::string(start, prev)); + DEBUG(stderr, "IDENTIFIER %ld \"%s\"\n", prev - start, + tokens[tokens.size() - 1].c_str()); - } + } else { - tokens.push_back(std::string(c, e)); + tokens.push_back(std::string(prev, prev + 1)); + DEBUG(stderr, "OTHER \"%c\"\n", *prev); + ++prev; } @@ -383,68 +434,44 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - if (cur < ende) { - - sregex_token_iterator it{cur, ende, regex_whitespace, -1}; - vector tokenized{it, {}}; - tokenized.erase( - std::remove_if(tokenized.begin(), tokenized.end(), - [](std::string const &s) { return s.size() == 0; }), - tokenized.end()); - tokens.reserve(tokens.size() + tokenized.size() * 2 + 1); + DEBUG(stderr, "AFTER all strings\n"); - DEBUG(stderr, "tokens: %lu input size: %lu\n", tokenized.size(), - input.size()); - if (unlikely(debug)) - for (auto x : tokenized) { + if (cur < ende) { - cerr << x << endl; + while (cur < ende) { - } + if (isspace(*cur)) { - for (auto token : tokenized) { + auto start = cur; + while (isspace(*cur)) { - string::const_iterator c = token.begin(), e = token.end(), f, p; - smatch m; + ++cur; - while (regex_search(c, e, m, regex_word)) { - - p = c; - f = m[0].first; - c = m[0].second; - if (p < f) { + } - // there are items between search start and find - if (unlikely(debug)) { + tokens.push_back(std::string(start, cur)); + DEBUG(stderr, "WHITESPACE %ld \"%s\"\n", cur - start, + tokens[tokens.size() - 1].c_str()); - string foo(p, f); - DEBUG(stderr, "before string: \"%s\"\n", foo.c_str()); + } else if (isalnum(*cur) || *cur == '$' || *cur == '_') { - } + auto start = cur; + while (isalnum(*cur) || *cur == '$' || *cur == '_' || *cur == '.' || + *cur == '/') { - tokens.push_back(std::string(p, f)); + ++cur; } - DEBUG(stderr, - "SUB2string \"%s\" found at start %lu offset %lu continue at " - "%lu\n", - m[0].str().c_str(), p - input.begin(), m.position(), - c - token.begin()); - tokens.push_back(m[0].str()); - - } - - if (c < e) { - - if (unlikely(debug)) { + tokens.push_back(std::string(start, cur)); + DEBUG(stderr, "IDENTIFIER %ld \"%s\"\n", cur - start, + tokens[tokens.size() - 1].c_str()); - string foo(c, e); - DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); - - } + } else { - tokens.push_back(std::string(c, e)); + tokens.push_back(std::string(cur, cur + 1)); + DEBUG(stderr, "OTHER \"%c\"\n", *cur); + ++cur; } @@ -457,7 +484,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, DEBUG(stderr, "DUMPING TOKENS:\n"); for (u32 i = 0; i < tokens.size(); ++i) { - DEBUG(stderr, "%s ", tokens[i].c_str()); + DEBUG(stderr, "%s", tokens[i].c_str()); } @@ -475,10 +502,10 @@ extern "C" unsigned char afl_custom_queue_get(void *data, if ((id = token_to_id[tokens[i]]) == 0) { // First time we see this token, add it to the list - ++current_id; token_to_id[tokens[i]] = current_id; id_to_token[current_id] = tokens[i]; structure->push_back(current_id); + ++current_id; } else { @@ -529,6 +556,30 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { data->afl = afl_ptr = afl; + // set common whitespace tokens + token_to_id[" "] = current_id; + id_to_token[current_id] = " "; + ++current_id; + token_to_id["\t"] = current_id; + id_to_token[current_id] = "\t"; + ++current_id; + token_to_id["\n"] = current_id; + id_to_token[current_id] = "\n"; + ++current_id; + token_to_id["\r\n"] = current_id; + id_to_token[current_id] = "\r\n"; + ++current_id; + token_to_id[" \n"] = current_id; + id_to_token[current_id] = " \n"; + ++current_id; + token_to_id[" "] = current_id; + id_to_token[current_id] = " "; + ++current_id; + token_to_id["\t\t"] = current_id; + id_to_token[current_id] = "\t\t"; + ++current_id; + whitespace_ids = current_id; + return data; } -- cgit v1.2.3 From 33f41e3974348d3b0b71b3a30a6483bb0418068c Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Tue, 17 Jan 2023 09:52:35 +0100 Subject: autotokens: print stats at exit --- custom_mutators/autotokens/README | 7 ++++--- custom_mutators/autotokens/autotokens.cpp | 12 ++++++++++++ 2 files changed, 16 insertions(+), 3 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README index 6849279e..0dcc6a3e 100644 --- a/custom_mutators/autotokens/README +++ b/custom_mutators/autotokens/README @@ -1,6 +1,6 @@ # autotokens -This implements an improved autotoken idea presented in +This implements an improved autotoken grammar fuzzing idea presented in [Token-Level Fuzzing][https://www.usenix.org/system/files/sec21-salls.pdf]. It is a grammar fuzzer without actually knowing the grammar. @@ -8,5 +8,6 @@ It is recommended to run with together in an instance with `CMPLOG`. If you have a dictionary (`-x`) this improves this custom grammar mutator. -If **not** run with `CMPLOG`, it is possible to set `AFL_CUSTOM_MUTATOR_ONLY`, -to concentrate on grammar bug classes. +If **not** running with `CMPLOG`, it is possible to set +`AFL_CUSTOM_MUTATOR_ONLY` to concentrate on grammar bug classes. + diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 850692a1..d6b269fd 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -35,6 +35,7 @@ static u32 valid_structures; static u32 whitespace_ids; static u32 extras_cnt, a_extras_cnt; static u64 all_spaces, all_tabs, all_lf, all_ws; +static u64 all_structure_items; static unordered_map *> file_mapping; static unordered_map token_to_id; static unordered_map id_to_token; @@ -519,6 +520,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, file_mapping[fn] = structure; s = structure; ++valid_structures; + all_structure_items += structure->size(); // we are done! DEBUG(stderr, "DONE! We have %lu tokens in the structure\n", @@ -586,6 +588,16 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { extern "C" void afl_custom_deinit(my_mutator_t *data) { + /* we use this to print statistics at exit :-) + needs to be stderr as stdout is filtered */ + + fprintf(stderr, + "\n\nAutotoken mutator statistics:\n" + " Number of all seen tokens: %lu\n" + " Number of input structures: %lu\n" + " Number of all items in structures: %lu\n\n", + current_id - 1, valid_structures, all_structure_items); + free(data); } -- cgit v1.2.3 From efe57c936880608a2de452340d63f262470d9fcd Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Tue, 17 Jan 2023 09:57:23 +0100 Subject: more whitespace --- custom_mutators/autotokens/autotokens.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index d6b269fd..5580512a 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -559,6 +559,8 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { data->afl = afl_ptr = afl; // set common whitespace tokens + // we deliberately do not put uncommon ones here to these will count as + // identifier tokens. token_to_id[" "] = current_id; id_to_token[current_id] = " "; ++current_id; @@ -580,6 +582,21 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { token_to_id["\t\t"] = current_id; id_to_token[current_id] = "\t\t"; ++current_id; + token_to_id["\n\n"] = current_id; + id_to_token[current_id] = "\n\n"; + ++current_id; + token_to_id["\r\n\r\n"] = current_id; + id_to_token[current_id] = "\r\n\r\n"; + ++current_id; + token_to_id[" "] = current_id; + id_to_token[current_id] = " "; + ++current_id; + token_to_id["\t\t\t\t"] = current_id; + id_to_token[current_id] = "\t\t\t\t"; + ++current_id; + token_to_id["\n\n\n\n"] = current_id; + id_to_token[current_id] = "\n\n\n\n"; + ++current_id; whitespace_ids = current_id; return data; -- cgit v1.2.3 From a41fd5cc5c4a5073f38adf06270e2985c88da9d5 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 18 Jan 2023 11:46:28 +0100 Subject: alternate tokenize, options --- custom_mutators/autotokens/README | 9 + custom_mutators/autotokens/autotokens.cpp | 432 ++++++++++++++++++++++++------ 2 files changed, 365 insertions(+), 76 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README index 0dcc6a3e..f6e9c753 100644 --- a/custom_mutators/autotokens/README +++ b/custom_mutators/autotokens/README @@ -11,3 +11,12 @@ If you have a dictionary (`-x`) this improves this custom grammar mutator. If **not** running with `CMPLOG`, it is possible to set `AFL_CUSTOM_MUTATOR_ONLY` to concentrate on grammar bug classes. +## Configuration via environment variables + +`AUTOTOKENS_ONLY_FAV` - only use this mutator on favorite queue items +`AUTOTOKENS_COMMENT` - what character or string starts a comment which will be + removed. Default: `/* ... */` +`AUTOTOKENS_ALTERNATIVE_TOKENIZE` - use an alternative tokenize implementation + (experimental) +`AUTOTOKENS_WHITESPACE` - whitespace string to use for ALTERNATIVE_TOKENIZE, + default is " " diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 5580512a..28ef91e2 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -15,7 +15,10 @@ extern "C" { #include #define AUTOTOKENS_DEBUG 0 +#define AUTOTOKENS_ONLY_FAV 0 +#define AUTOTOKENS_ALTERNATIVE_TOKENIZE 0 #define AUTOTOKENS_CHANGE_MIN 8 +#define AUTOTOKENS_WHITESPACE " " using namespace std; @@ -30,6 +33,8 @@ typedef struct my_mutator { static afl_state *afl_ptr; static int debug = AUTOTOKENS_DEBUG; +static int only_fav = AUTOTOKENS_ONLY_FAV; +static int alternative_tokenize = AUTOTOKENS_ALTERNATIVE_TOKENIZE; static u32 current_id; static u32 valid_structures; static u32 whitespace_ids; @@ -39,9 +44,12 @@ static u64 all_structure_items; static unordered_map *> file_mapping; static unordered_map token_to_id; static unordered_map id_to_token; -// static regex regex_comment_slash("(//.*)([\r\n]?)", regex::optimize); +static string whitespace = AUTOTOKENS_WHITESPACE; +static regex *regex_comment_custom; static regex regex_comment_star("/\\*([:print:]|\n)*?\\*/", regex::multiline | regex::optimize); +static regex regex_word("[A-Za-z0-9_$]+", regex::optimize); +static regex regex_whitespace(R"([ \t]+)", regex::optimize); static regex regex_string("\"[[:print:]]*?\"|'[[:print:]]*?'", regex::optimize); static vector *s; // the structure of the currently selected input @@ -84,15 +92,15 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, afl_ptr->havoc_div / 256)); // DEBUG(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds); - u32 max_rand = 4; + u32 max_rand = 7; for (i = 0; i < rounds; ++i) { switch (rand_below(afl_ptr, max_rand)) { /* CHANGE */ - case 0: /* fall through */ - case 1: { + case 0 ... 3: /* fall through */ + { u32 pos = rand_below(afl_ptr, m_size); u32 cur_item = m[pos], new_item; @@ -103,8 +111,9 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } while (unlikely( new_item == cur_item || - (whitespace_ids < new_item && whitespace_ids >= cur_item) || - (whitespace_ids >= new_item && whitespace_ids < cur_item))); + (!alternative_tokenize && + ((whitespace_ids < new_item && whitespace_ids >= cur_item) || + (whitespace_ids >= new_item && whitespace_ids < cur_item))))); DEBUG(stderr, "MUT: %u -> %u\n", cur_item, new_item); m[pos] = new_item; @@ -113,7 +122,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } /* INSERT (m_size +1 so we insert also after last place) */ - case 2: { + case 4 ... 5: { u32 new_item; do { @@ -126,26 +135,30 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, m.insert(m.begin() + pos, new_item); ++m_size; - // if we insert an identifier or string we might need whitespace - if (id_to_token[new_item].size() > 1) { + if (likely(!alternative_tokenize)) { - // need to insert before? + // if we insert an identifier or string we might need whitespace + if (id_to_token[new_item].size() > 1) { - if (pos && m[pos - 1] >= whitespace_ids && - id_to_token[m[pos - 1]].size() > 1) { + // need to insert before? - m.insert(m.begin() + pos, good_whitespace_or_singleval()); - ++m_size; + if (pos && m[pos - 1] >= whitespace_ids && + id_to_token[m[pos - 1]].size() > 1) { - } + m.insert(m.begin() + pos, good_whitespace_or_singleval()); + ++m_size; + + } + + if (pos + 1 < m_size && m[pos + 1] >= whitespace_ids && + id_to_token[m[pos + 1]].size() > 1) { - if (pos + 1 < m_size && m[pos + 1] >= whitespace_ids && - id_to_token[m[pos + 1]].size() > 1) { + // need to insert after? - // need to insert after? + m.insert(m.begin() + pos + 1, good_whitespace_or_singleval()); + ++m_size; - m.insert(m.begin() + pos + 1, good_whitespace_or_singleval()); - ++m_size; + } } @@ -156,7 +169,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } /* ERASE - only if large enough */ - case 3: { + case 6: { if (m_size > 8) { @@ -165,7 +178,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } else { - max_rand = 3; + max_rand = 6; } @@ -180,10 +193,16 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } string output; + u32 m_size_1 = m_size - 1; for (i = 0; i < m_size; ++i) { output += id_to_token[m[i]]; + if (unlikely(alternative_tokenize && i < m_size_1)) { + + output += whitespace; + + } } @@ -219,7 +238,8 @@ extern "C" unsigned char afl_custom_queue_get(void *data, if (likely(!debug)) { - if (afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) { + if ((afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) || + (only_fav && !afl_ptr->queue_cur->favored)) { s = NULL; return 0; @@ -353,8 +373,15 @@ extern "C" unsigned char afl_custom_queue_get(void *data, // DEBUG(stderr, "Read %lu bytes for %s\nBefore comment trim:\n%s\n", // input.size(), filename, input.c_str()); - // input = regex_replace(input, regex_comment_slash, "$2"); - input = regex_replace(input, regex_comment_star, ""); + if (regex_comment_custom) { + + input = regex_replace(input, *regex_comment_custom, "$2"); + + } else { + + input = regex_replace(input, regex_comment_star, ""); + + } DEBUG(stderr, "After replace %lu bytes for %s\n%s\n", input.size(), filename, input.c_str()); @@ -377,53 +404,105 @@ extern "C" unsigned char afl_custom_queue_get(void *data, DEBUG(stderr, "START!\n"); - while (regex_search(cur, ende, match, regex_string, - regex_constants::match_any | - regex_constants::match_not_null | - regex_constants::match_continuous)) { + if (likely(!alternative_tokenize)) { + + while (regex_search(cur, ende, match, regex_string, + regex_constants::match_any | + regex_constants::match_not_null | + regex_constants::match_continuous)) { + + prev = cur; + found = match[0].first; + cur = match[0].second; + DEBUG(stderr, + "string %s found at start %lu offset %lu continue at %lu\n", + match[0].str().c_str(), prev - input.begin(), match.position(), + cur - input.begin()); + + if (prev < found) { // there are items between search start and find + while (prev < found) { - prev = cur; - found = match[0].first; - cur = match[0].second; - DEBUG(stderr, "string %s found at start %lu offset %lu continue at %lu\n", - match[0].str().c_str(), prev - input.begin(), match.position(), - cur - input.begin()); + if (isspace(*prev)) { - if (prev < found) { // there are items between search start and find - while (prev < found) { + auto start = prev; + while (isspace(*prev)) { - if (isspace(*prev)) { + ++prev; - auto start = prev; - while (isspace(*prev)) { + } + tokens.push_back(std::string(start, prev)); + DEBUG(stderr, "WHITESPACE %ld \"%s\"\n", prev - start, + tokens[tokens.size() - 1].c_str()); + + } else if (isalnum(*prev) || *prev == '$' || *prev == '_') { + + auto start = prev; + while (isalnum(*prev) || *prev == '$' || *prev == '_' || + *prev == '.' || *prev == '/') { + + ++prev; + + } + + tokens.push_back(std::string(start, prev)); + DEBUG(stderr, "IDENTIFIER %ld \"%s\"\n", prev - start, + tokens[tokens.size() - 1].c_str()); + + } else { + + tokens.push_back(std::string(prev, prev + 1)); + DEBUG(stderr, "OTHER \"%c\"\n", *prev); ++prev; } - tokens.push_back(std::string(start, prev)); - DEBUG(stderr, "WHITESPACE %ld \"%s\"\n", prev - start, + } + + } + + if (match[0].length() > 0) { tokens.push_back(match[0]); } + + } + + DEBUG(stderr, "AFTER all strings\n"); + + if (cur < ende) { + + while (cur < ende) { + + if (isspace(*cur)) { + + auto start = cur; + while (isspace(*cur)) { + + ++cur; + + } + + tokens.push_back(std::string(start, cur)); + DEBUG(stderr, "WHITESPACE %ld \"%s\"\n", cur - start, tokens[tokens.size() - 1].c_str()); - } else if (isalnum(*prev) || *prev == '$' || *prev == '_') { + } else if (isalnum(*cur) || *cur == '$' || *cur == '_') { - auto start = prev; - while (isalnum(*prev) || *prev == '$' || *prev == '_' || - *prev == '.' || *prev == '/') { + auto start = cur; + while (isalnum(*cur) || *cur == '$' || *cur == '_' || *cur == '.' || + *cur == '/') { - ++prev; + ++cur; } - tokens.push_back(std::string(start, prev)); - DEBUG(stderr, "IDENTIFIER %ld \"%s\"\n", prev - start, + tokens.push_back(std::string(start, cur)); + DEBUG(stderr, "IDENTIFIER %ld \"%s\"\n", cur - start, tokens[tokens.size() - 1].c_str()); } else { - tokens.push_back(std::string(prev, prev + 1)); - DEBUG(stderr, "OTHER \"%c\"\n", *prev); - ++prev; + tokens.push_back(std::string(cur, cur + 1)); + DEBUG(stderr, "OTHER \"%c\"\n", *cur); + ++cur; } @@ -431,48 +510,227 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - if (match[0].length() > 0) { tokens.push_back(match[0]); } + } else { - } + // alternative tokenize - DEBUG(stderr, "AFTER all strings\n"); + while (regex_search(cur, ende, match, regex_string)) { - if (cur < ende) { + prev = cur; + found = match[0].first; + cur = match[0].second; + DEBUG(stderr, + "string %s found at start %lu offset %lu continue at %lu\n", + match[0].str().c_str(), prev - input.begin(), match.position(), + cur - input.begin()); + if (prev < found) { // there are items between search start and find + sregex_token_iterator it{prev, found, regex_whitespace, -1}; + vector tokenized{it, {}}; + tokenized.erase(std::remove_if(tokenized.begin(), tokenized.end(), + [](std::string const &s) { - while (cur < ende) { + return s.size() == 0; - if (isspace(*cur)) { + }), - auto start = cur; - while (isspace(*cur)) { + tokenized.end()); + tokens.reserve(tokens.size() + tokenized.size() * 2 + 1); - ++cur; + if (unlikely(debug)) { + + DEBUG(stderr, "tokens: %lu input size: %lu\n", tokenized.size(), + input.size()); + for (auto x : tokenized) { + + cerr << x << endl; + + } } - tokens.push_back(std::string(start, cur)); - DEBUG(stderr, "WHITESPACE %ld \"%s\"\n", cur - start, - tokens[tokens.size() - 1].c_str()); + for (auto token : tokenized) { - } else if (isalnum(*cur) || *cur == '$' || *cur == '_') { + string::const_iterator c = token.begin(), e = token.end(), f, p; + smatch m; - auto start = cur; - while (isalnum(*cur) || *cur == '$' || *cur == '_' || *cur == '.' || - *cur == '/') { + while (regex_search(c, e, m, regex_word)) { - ++cur; + p = c; + f = m[0].first; + c = m[0].second; + if (p < f) { + + // there are items between search start and find + while (p < f) { + + if (unlikely(debug)) { + + string foo(p, p + 1); + DEBUG(stderr, "before string: \"%s\"\n", foo.c_str()); + + } + + tokens.push_back(std::string(p, p + 1)); + ++p; + + } + + /* + string foo(p, f); + DEBUG(stderr, "before string: \"%s\"\n", + foo.c_str()); tokens.push_back(std::string(p, f)); + */ + + } + + DEBUG( + stderr, + "SUBstring \"%s\" found at start %lu offset %lu continue at " + "%lu\n", + m[0].str().c_str(), p - input.begin(), m.position(), + c - token.begin()); + tokens.push_back(m[0].str()); + + } + + if (c < e) { + + while (c < e) { + + if (unlikely(debug)) { + + string foo(c, c + 1); + DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); + + } + + tokens.push_back(std::string(c, c + 1)); + ++c; + + } + + /* + if (unlikely(debug)) { + + string foo(c, e); + DEBUG(stderr, "after string: \"%s\"\n", + foo.c_str()); + + } + + tokens.push_back(std::string(c, e)); + */ + + } } - tokens.push_back(std::string(start, cur)); - DEBUG(stderr, "IDENTIFIER %ld \"%s\"\n", cur - start, - tokens[tokens.size() - 1].c_str()); + } + + if (match[0].length() > 0) { tokens.push_back(match[0]); } - } else { + } + + if (cur < ende) { + + sregex_token_iterator it{cur, ende, regex_whitespace, -1}; + vector tokenized{it, {}}; + tokenized.erase( + std::remove_if(tokenized.begin(), tokenized.end(), + [](std::string const &s) { return s.size() == 0; }), + tokenized.end()); + tokens.reserve(tokens.size() + tokenized.size() * 2 + 1); + + if (unlikely(debug)) { + + DEBUG(stderr, "tokens: %lu input size: %lu\n", tokenized.size(), + input.size()); + for (auto x : tokenized) { + + cerr << x << endl; - tokens.push_back(std::string(cur, cur + 1)); - DEBUG(stderr, "OTHER \"%c\"\n", *cur); - ++cur; + } + + } + + for (auto token : tokenized) { + + string::const_iterator c = token.begin(), e = token.end(), f, p; + smatch m; + + while (regex_search(c, e, m, regex_word)) { + + p = c; + f = m[0].first; + c = m[0].second; + if (p < f) { + + // there are items between search start and find + while (p < f) { + + if (unlikely(debug)) { + + string foo(p, p + 1); + DEBUG(stderr, "before string: \"%s\"\n", foo.c_str()); + + } + + tokens.push_back(std::string(p, p + 1)); + ++p; + + } + + /* + if (unlikely(debug)) { + + string foo(p, f); + DEBUG(stderr, "before string: \"%s\"\n", + foo.c_str()); + + } + + tokens.push_back(std::string(p, f)); + */ + + } + + DEBUG(stderr, + "SUB2string \"%s\" found at start %lu offset %lu continue at " + "%lu\n", + m[0].str().c_str(), p - input.begin(), m.position(), + c - token.begin()); + tokens.push_back(m[0].str()); + + } + + if (c < e) { + + while (c < e) { + + if (unlikely(debug)) { + + string foo(c, c + 1); + DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); + + } + + tokens.push_back(std::string(c, c + 1)); + ++c; + + } + + /* + if (unlikely(debug)) { + + string foo(c, e); + DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); + + } + + tokens.push_back(std::string(c, e)); + */ + + } } @@ -483,9 +741,15 @@ extern "C" unsigned char afl_custom_queue_get(void *data, if (unlikely(debug)) { DEBUG(stderr, "DUMPING TOKENS:\n"); + u32 size_1 = tokens.size() - 1; for (u32 i = 0; i < tokens.size(); ++i) { DEBUG(stderr, "%s", tokens[i].c_str()); + if (unlikely(alternative_tokenize && i < size_1)) { + + DEBUG(stderr, "%s", whitespace.c_str()); + + } } @@ -556,6 +820,22 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { } + if (getenv("AUTOTOKENS_ONLY_FAV")) { only_fav = 1; } + if (getenv("AUTOTOKENS_ALTERNATIVE_TOKENIZE")) { alternative_tokenize = 1; } + if (getenv("AUTOTOKENS_WHITESPACE")) { + + whitespace = getenv("AUTOTOKENS_WHITESPACE"); + + } + + if (getenv("AUTOTOKENS_COMMENT")) { + + char buf[256]; + snprintf(buf, sizeof(buf), "(%s.*)([\r\n]?)", getenv("AUTOTOKENS_COMMENT")); + regex_comment_custom = new regex(buf, regex::optimize); + + } + data->afl = afl_ptr = afl; // set common whitespace tokens -- cgit v1.2.3 From 70f4b456faf8e361f6e0a34246708380c94cb36e Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 18 Jan 2023 13:58:27 +0100 Subject: fixes --- custom_mutators/autotokens/Makefile | 7 ++++++- custom_mutators/autotokens/autotokens.cpp | 24 +++++++++++++++++------- 2 files changed, 23 insertions(+), 8 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/Makefile b/custom_mutators/autotokens/Makefile index 5dd52dee..8af63635 100644 --- a/custom_mutators/autotokens/Makefile +++ b/custom_mutators/autotokens/Makefile @@ -1,7 +1,12 @@ +ifdef debug + CFLAGS += "-fsanitize=address -Wall" + CXX := clang++ +endif + all: autotokens.so autotokens.so: autotokens.cpp $(CXX) -g -O3 $(CFLAGS) -shared -fPIC -o autotokens.so -I../../include autotokens.cpp ../../src/afl-performance.o clean: - rm -f autotokens.so *~ core \ No newline at end of file + rm -f autotokens.so *~ core diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 28ef91e2..57c35846 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -48,7 +48,7 @@ static string whitespace = AUTOTOKENS_WHITESPACE; static regex *regex_comment_custom; static regex regex_comment_star("/\\*([:print:]|\n)*?\\*/", regex::multiline | regex::optimize); -static regex regex_word("[A-Za-z0-9_$]+", regex::optimize); +static regex regex_word("[A-Za-z0-9_$.-]+", regex::optimize); static regex regex_whitespace(R"([ \t]+)", regex::optimize); static regex regex_string("\"[[:print:]]*?\"|'[[:print:]]*?'", regex::optimize); static vector *s; // the structure of the currently selected input @@ -514,7 +514,10 @@ extern "C" unsigned char afl_custom_queue_get(void *data, // alternative tokenize - while (regex_search(cur, ende, match, regex_string)) { + while (regex_search(cur, ende, match, regex_string, + regex_constants::match_any | + regex_constants::match_not_null | + regex_constants::match_continuous)) { prev = cur; found = match[0].first; @@ -553,7 +556,10 @@ extern "C" unsigned char afl_custom_queue_get(void *data, string::const_iterator c = token.begin(), e = token.end(), f, p; smatch m; - while (regex_search(c, e, m, regex_word)) { + while (regex_search(c, e, m, regex_word, + regex_constants::match_any | + regex_constants::match_not_null | + regex_constants::match_continuous)) { p = c; f = m[0].first; @@ -658,7 +664,10 @@ extern "C" unsigned char afl_custom_queue_get(void *data, string::const_iterator c = token.begin(), e = token.end(), f, p; smatch m; - while (regex_search(c, e, m, regex_word)) { + while (regex_search(c, e, m, regex_word, + regex_constants::match_any | + regex_constants::match_not_null | + regex_constants::match_continuous)) { p = c; f = m[0].first; @@ -820,6 +829,7 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { } + if (getenv("AUTOTOKENS_DEBUG")) { debug = 1; } if (getenv("AUTOTOKENS_ONLY_FAV")) { only_fav = 1; } if (getenv("AUTOTOKENS_ALTERNATIVE_TOKENIZE")) { alternative_tokenize = 1; } if (getenv("AUTOTOKENS_WHITESPACE")) { @@ -890,9 +900,9 @@ extern "C" void afl_custom_deinit(my_mutator_t *data) { fprintf(stderr, "\n\nAutotoken mutator statistics:\n" - " Number of all seen tokens: %lu\n" - " Number of input structures: %lu\n" - " Number of all items in structures: %lu\n\n", + " Number of all seen tokens: %u\n" + " Number of input structures: %u\n" + " Number of all items in structures: %llu\n\n", current_id - 1, valid_structures, all_structure_items); free(data); -- cgit v1.2.3 From 0db662db7b433a08b01de7f5a989843450919b88 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 18 Jan 2023 14:21:44 +0100 Subject: fix --- custom_mutators/autotokens/autotokens.cpp | 78 ++++++++++++++++--------------- 1 file changed, 41 insertions(+), 37 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 57c35846..94f86413 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -851,43 +851,47 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { // set common whitespace tokens // we deliberately do not put uncommon ones here to these will count as // identifier tokens. - token_to_id[" "] = current_id; - id_to_token[current_id] = " "; - ++current_id; - token_to_id["\t"] = current_id; - id_to_token[current_id] = "\t"; - ++current_id; - token_to_id["\n"] = current_id; - id_to_token[current_id] = "\n"; - ++current_id; - token_to_id["\r\n"] = current_id; - id_to_token[current_id] = "\r\n"; - ++current_id; - token_to_id[" \n"] = current_id; - id_to_token[current_id] = " \n"; - ++current_id; - token_to_id[" "] = current_id; - id_to_token[current_id] = " "; - ++current_id; - token_to_id["\t\t"] = current_id; - id_to_token[current_id] = "\t\t"; - ++current_id; - token_to_id["\n\n"] = current_id; - id_to_token[current_id] = "\n\n"; - ++current_id; - token_to_id["\r\n\r\n"] = current_id; - id_to_token[current_id] = "\r\n\r\n"; - ++current_id; - token_to_id[" "] = current_id; - id_to_token[current_id] = " "; - ++current_id; - token_to_id["\t\t\t\t"] = current_id; - id_to_token[current_id] = "\t\t\t\t"; - ++current_id; - token_to_id["\n\n\n\n"] = current_id; - id_to_token[current_id] = "\n\n\n\n"; - ++current_id; - whitespace_ids = current_id; + if (!alternative_tokenize) { + + token_to_id[" "] = current_id; + id_to_token[current_id] = " "; + ++current_id; + token_to_id["\t"] = current_id; + id_to_token[current_id] = "\t"; + ++current_id; + token_to_id["\n"] = current_id; + id_to_token[current_id] = "\n"; + ++current_id; + token_to_id["\r\n"] = current_id; + id_to_token[current_id] = "\r\n"; + ++current_id; + token_to_id[" \n"] = current_id; + id_to_token[current_id] = " \n"; + ++current_id; + token_to_id[" "] = current_id; + id_to_token[current_id] = " "; + ++current_id; + token_to_id["\t\t"] = current_id; + id_to_token[current_id] = "\t\t"; + ++current_id; + token_to_id["\n\n"] = current_id; + id_to_token[current_id] = "\n\n"; + ++current_id; + token_to_id["\r\n\r\n"] = current_id; + id_to_token[current_id] = "\r\n\r\n"; + ++current_id; + token_to_id[" "] = current_id; + id_to_token[current_id] = " "; + ++current_id; + token_to_id["\t\t\t\t"] = current_id; + id_to_token[current_id] = "\t\t\t\t"; + ++current_id; + token_to_id["\n\n\n\n"] = current_id; + id_to_token[current_id] = "\n\n\n\n"; + ++current_id; + whitespace_ids = current_id; + + } return data; -- cgit v1.2.3 From 22f757a169d3da3081306c0f861ef99a509073fe Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 18 Jan 2023 14:33:06 +0100 Subject: fix --- custom_mutators/autotokens/autotokens.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 94f86413..7aecb010 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -129,7 +129,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, new_item = rand_below(afl_ptr, current_id); - } while (new_item >= whitespace_ids); + } while (!alternative_tokenize && new_item >= whitespace_ids); u32 pos = rand_below(afl_ptr, m_size + 1); m.insert(m.begin() + pos, new_item); -- cgit v1.2.3 From 14d8eb9e40a6329abcb2f153174b543349c68c13 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 18 Jan 2023 22:17:14 +0100 Subject: autotoken: splicing; splice_optout --- custom_mutators/autotokens/Makefile | 6 +- custom_mutators/autotokens/autotokens.cpp | 103 +++++++++++++++++++++++++++--- 2 files changed, 99 insertions(+), 10 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/Makefile b/custom_mutators/autotokens/Makefile index 8af63635..ab1da4b6 100644 --- a/custom_mutators/autotokens/Makefile +++ b/custom_mutators/autotokens/Makefile @@ -1,5 +1,9 @@ ifdef debug - CFLAGS += "-fsanitize=address -Wall" + CFLAGS += -fsanitize=address -Wall + CXX := clang++ +endif +ifdef DEBUG + CFLAGS += -fsanitize=address -Wall CXX := clang++ endif diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 7aecb010..c9ec4352 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -19,6 +19,13 @@ extern "C" { #define AUTOTOKENS_ALTERNATIVE_TOKENIZE 0 #define AUTOTOKENS_CHANGE_MIN 8 #define AUTOTOKENS_WHITESPACE " " +#define AUTOTOKENS_SIZE_MIN 8 +#define AUTOTOKENS_SPLICE_MIN 4 +#define AUTOTOKENS_SPLICE_MAX 64 + +#if AUTOTOKENS_SPLICE_MIN >= AUTOTOKENS_SIZE_MIN + #error SPLICE_MIN must be lower than SIZE_MIN +#endif using namespace std; @@ -42,6 +49,7 @@ static u32 extras_cnt, a_extras_cnt; static u64 all_spaces, all_tabs, all_lf, all_ws; static u64 all_structure_items; static unordered_map *> file_mapping; +static unordered_map *> id_mapping; static unordered_map token_to_id; static unordered_map id_to_token; static string whitespace = AUTOTOKENS_WHITESPACE; @@ -76,6 +84,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, u8 **out_buf, u8 *add_buf, size_t add_buf_size, size_t max_size) { + (void)(data); + if (s == NULL) { *out_buf = NULL; @@ -92,14 +102,14 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, afl_ptr->havoc_div / 256)); // DEBUG(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds); - u32 max_rand = 7; + u32 max_rand = 14; for (i = 0; i < rounds; ++i) { switch (rand_below(afl_ptr, max_rand)) { /* CHANGE */ - case 0 ... 3: /* fall through */ + case 0 ... 7: /* fall through */ { u32 pos = rand_below(afl_ptr, m_size); @@ -122,18 +132,19 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } /* INSERT (m_size +1 so we insert also after last place) */ - case 4 ... 5: { + case 8 ... 9: { u32 new_item; do { new_item = rand_below(afl_ptr, current_id); - } while (!alternative_tokenize && new_item >= whitespace_ids); + } while (unlikely(!alternative_tokenize && new_item >= whitespace_ids)); u32 pos = rand_below(afl_ptr, m_size + 1); m.insert(m.begin() + pos, new_item); ++m_size; + DEBUG(stderr, "INS: %u at %u\n", new_item, pos); if (likely(!alternative_tokenize)) { @@ -168,8 +179,63 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } + /* SPLICING */ + case 10 ... 11: { + + u32 strategy = rand_below(afl_ptr, 4), dst_off, n; + auto src = id_mapping[rand_below(afl_ptr, valid_structures)]; + u32 src_size = src->size(); + u32 src_off = rand_below(afl_ptr, src_size - AUTOTOKENS_SPLICE_MIN); + u32 rand_r = 1 + MAX(AUTOTOKENS_SPLICE_MIN, + MIN(AUTOTOKENS_SPLICE_MAX, src_size - src_off)); + + switch (strategy) { + + // insert + case 0: { + + dst_off = rand_below(afl_ptr, m_size); + n = AUTOTOKENS_SPLICE_MIN + + rand_below(afl_ptr, MIN(AUTOTOKENS_SPLICE_MAX, + rand_r - AUTOTOKENS_SPLICE_MIN)); + m.insert(m.begin() + dst_off, src->begin() + src_off, + src->begin() + src_off + n); + m_size += n; + DEBUG(stderr, "SPLICE-INS: %u at %u\n", n, dst_off); + break; + + } + + // overwrite + default: { + + dst_off = rand_below(afl_ptr, m_size - AUTOTOKENS_SPLICE_MIN); + n = AUTOTOKENS_SPLICE_MIN + + rand_below( + afl_ptr, + MIN(AUTOTOKENS_SPLICE_MAX - AUTOTOKENS_SPLICE_MIN, + MIN(m_size - dst_off - AUTOTOKENS_SPLICE_MIN, + src_size - src_off - AUTOTOKENS_SPLICE_MIN))); + + for (u32 i = 0; i < n; ++i) { + + m[dst_off + i] = (*src)[src_off + i]; + + } + + DEBUG(stderr, "SPLICE-MUT: %u at %u\n", n, dst_off); + break; + + } + + } + + break; + + } + /* ERASE - only if large enough */ - case 6: { + case 12 ... 13: { if (m_size > 8) { @@ -178,7 +244,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } else { - max_rand = 6; + max_rand = 12; } @@ -236,12 +302,15 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, extern "C" unsigned char afl_custom_queue_get(void *data, const unsigned char *filename) { + (void)(data); + if (likely(!debug)) { if ((afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) || (only_fav && !afl_ptr->queue_cur->favored)) { s = NULL; + DEBUG(stderr, "cmplog not ascii or only_fav and not favorite\n"); return 0; } @@ -334,8 +403,8 @@ extern "C" unsigned char afl_custom_queue_get(void *data, fclose(fp); file_mapping[fn] = structure; // NULL ptr so we don't read the file again - DEBUG(stderr, "Too short (%lu) %s\n", len, filename); s = NULL; + DEBUG(stderr, "Too short (%lu) %s\n", len, filename); return 0; } @@ -362,8 +431,8 @@ extern "C" unsigned char afl_custom_queue_get(void *data, if (((len * AFL_TXT_MIN_PERCENT) / 100) > valid_chars) { file_mapping[fn] = NULL; - DEBUG(stderr, "Not text (%lu) %s\n", len, filename); s = NULL; + DEBUG(stderr, "Not text (%lu) %s\n", len, filename); return 0; } @@ -766,6 +835,15 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } + if (tokens.size() < AUTOTOKENS_SIZE_MIN) { + + file_mapping[fn] = NULL; + s = NULL; + DEBUG(stderr, "too few tokens\n"); + return 0; + + } + /* Now we transform the tokens into an ID list and saved that */ structure = new vector(); @@ -791,8 +869,9 @@ extern "C" unsigned char afl_custom_queue_get(void *data, // save the token structure to the file mapping file_mapping[fn] = structure; - s = structure; + id_mapping[valid_structures] = structure; ++valid_structures; + s = structure; all_structure_items += structure->size(); // we are done! @@ -897,6 +976,12 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { } +extern "C" void afl_custom_splice_optout(my_mutator_t *data) { + + (void)(data); + +} + extern "C" void afl_custom_deinit(my_mutator_t *data) { /* we use this to print statistics at exit :-) -- cgit v1.2.3 From 17752465e6b3c70fd0104fae7bb1f84c1cb8bb66 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 18 Jan 2023 22:31:55 +0100 Subject: nit --- custom_mutators/autotokens/README | 2 ++ custom_mutators/autotokens/TODO | 8 +------- custom_mutators/autotokens/autotokens.cpp | 7 ++----- 3 files changed, 5 insertions(+), 12 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README index f6e9c753..f82dcd98 100644 --- a/custom_mutators/autotokens/README +++ b/custom_mutators/autotokens/README @@ -11,6 +11,8 @@ If you have a dictionary (`-x`) this improves this custom grammar mutator. If **not** running with `CMPLOG`, it is possible to set `AFL_CUSTOM_MUTATOR_ONLY` to concentrate on grammar bug classes. +Do **not** set `AFL_DISABLE_TRIM` with this custom mutator! + ## Configuration via environment variables `AUTOTOKENS_ONLY_FAV` - only use this mutator on favorite queue items diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO index 2e5e384f..95b79373 100644 --- a/custom_mutators/autotokens/TODO +++ b/custom_mutators/autotokens/TODO @@ -1,12 +1,6 @@ -whitespace belassen oder notieren? MAYBE -0=space 1=tab 2=linefeed - cmplog: only add tokens that were found to fit? create from thin air if no good seed after a cycle and dict large enough? (static u32 no_of_struct_inputs;) -splice insert, splice overwrite -(linefeed, semicolon) - - +splicing -> check if whitespace/token is needed \ No newline at end of file diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index c9ec4352..5e683455 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -217,11 +217,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, MIN(m_size - dst_off - AUTOTOKENS_SPLICE_MIN, src_size - src_off - AUTOTOKENS_SPLICE_MIN))); - for (u32 i = 0; i < n; ++i) { - - m[dst_off + i] = (*src)[src_off + i]; - - } + copy(src->begin() + src_off, src->begin() + src_off + n, + m.begin() + dst_off); DEBUG(stderr, "SPLICE-MUT: %u at %u\n", n, dst_off); break; -- cgit v1.2.3 From 45567791c66e128361a7533481b385497ced881f Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 18 Jan 2023 23:09:16 +0100 Subject: autotokens: define disable splice --- custom_mutators/autotokens/autotokens.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 5e683455..f6ab9ddd 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -22,6 +22,7 @@ extern "C" { #define AUTOTOKENS_SIZE_MIN 8 #define AUTOTOKENS_SPLICE_MIN 4 #define AUTOTOKENS_SPLICE_MAX 64 +#define AUTOTOKENS_SPLICE_DISABLE 0 #if AUTOTOKENS_SPLICE_MIN >= AUTOTOKENS_SIZE_MIN #error SPLICE_MIN must be lower than SIZE_MIN @@ -102,7 +103,13 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, afl_ptr->havoc_div / 256)); // DEBUG(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds); - u32 max_rand = 14; +#if AUTOTOKENS_SPLICE_DISABLE == 1 + #define AUTOTOKENS_MUT_MAX 12 +#else + #define AUTOTOKENS_MUT_MAX 14 +#endif + + u32 max_rand = AUTOTOKENS_MUT_MAX; for (i = 0; i < rounds; ++i) { @@ -179,6 +186,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } +#if AUTOTOKENS_SPLICE_DISABLE != 1 /* SPLICING */ case 10 ... 11: { @@ -230,9 +238,10 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, break; } +#endif /* ERASE - only if large enough */ - case 12 ... 13: { + default: { if (m_size > 8) { @@ -241,7 +250,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } else { - max_rand = 12; + max_rand = AUTOTOKENS_MUT_MAX - 2; } -- cgit v1.2.3 From 151a8facae2048a26c65658dfec507233a677fb0 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 18 Jan 2023 23:16:18 +0100 Subject: autotokens: stats --- custom_mutators/autotokens/autotokens.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index f6ab9ddd..4f3289c9 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -22,7 +22,9 @@ extern "C" { #define AUTOTOKENS_SIZE_MIN 8 #define AUTOTOKENS_SPLICE_MIN 4 #define AUTOTOKENS_SPLICE_MAX 64 -#define AUTOTOKENS_SPLICE_DISABLE 0 +#ifndef AUTOTOKENS_SPLICE_DISABLE + #define AUTOTOKENS_SPLICE_DISABLE 0 +#endif #if AUTOTOKENS_SPLICE_MIN >= AUTOTOKENS_SIZE_MIN #error SPLICE_MIN must be lower than SIZE_MIN @@ -49,6 +51,7 @@ static u32 whitespace_ids; static u32 extras_cnt, a_extras_cnt; static u64 all_spaces, all_tabs, all_lf, all_ws; static u64 all_structure_items; +static u64 fuzz_count; static unordered_map *> file_mapping; static unordered_map *> id_mapping; static unordered_map token_to_id; @@ -238,6 +241,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, break; } + #endif /* ERASE - only if large enough */ @@ -298,6 +302,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, memcpy(mutated_out, output.data(), mutated_size); *out_buf = mutated_out; + ++fuzz_count; return mutated_size; } @@ -997,8 +1002,9 @@ extern "C" void afl_custom_deinit(my_mutator_t *data) { "\n\nAutotoken mutator statistics:\n" " Number of all seen tokens: %u\n" " Number of input structures: %u\n" - " Number of all items in structures: %llu\n\n", - current_id - 1, valid_structures, all_structure_items); + " Number of all items in structures: %llu\n" + " Number of total fuzzes: %llu\n\n", + current_id - 1, valid_structures, all_structure_items, fuzz_count); free(data); -- cgit v1.2.3 From eeca3a0b2939c605497e9b3a615ee4a466f4a3f2 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Thu, 19 Jan 2023 11:52:19 +0100 Subject: lots of fixes --- custom_mutators/autotokens/TODO | 2 +- custom_mutators/autotokens/autotokens.cpp | 424 +++++++++++++++++++----------- 2 files changed, 271 insertions(+), 155 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO index 95b79373..2e39511c 100644 --- a/custom_mutators/autotokens/TODO +++ b/custom_mutators/autotokens/TODO @@ -3,4 +3,4 @@ cmplog: only add tokens that were found to fit? create from thin air if no good seed after a cycle and dict large enough? (static u32 no_of_struct_inputs;) -splicing -> check if whitespace/token is needed \ No newline at end of file +splicing -> check if whitespace/token is needed diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 4f3289c9..102bea0f 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -38,8 +38,10 @@ typedef struct my_mutator { } my_mutator_t; -#define DEBUG \ +#undef DEBUGF +#define DEBUGF \ if (unlikely(debug)) fprintf +#define IFDEBUG if (unlikely(debug)) static afl_state *afl_ptr; static int debug = AUTOTOKENS_DEBUG; @@ -57,12 +59,12 @@ static unordered_map *> id_mapping; static unordered_map token_to_id; static unordered_map id_to_token; static string whitespace = AUTOTOKENS_WHITESPACE; +static string output; static regex *regex_comment_custom; -static regex regex_comment_star("/\\*([:print:]|\n)*?\\*/", - regex::multiline | regex::optimize); -static regex regex_word("[A-Za-z0-9_$.-]+", regex::optimize); -static regex regex_whitespace(R"([ \t]+)", regex::optimize); -static regex regex_string("\"[[:print:]]*?\"|'[[:print:]]*?'", regex::optimize); +static regex regex_comment_star("/\\*([:print:]|\n)*?\\*/", + regex::multiline | regex::optimize); +static regex regex_word("[A-Za-z0-9_$.-]+", regex::optimize); +static regex regex_whitespace(R"([ \t]+)", regex::optimize); static vector *s; // the structure of the currently selected input u32 good_whitespace_or_singleval() { @@ -104,7 +106,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, MAX(AUTOTOKENS_CHANGE_MIN, MIN(m_size >> 3, HAVOC_CYCLES * afl_ptr->queue_cur->perf_score * afl_ptr->havoc_div / 256)); - // DEBUG(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds); + // DEBUGF(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds); #if AUTOTOKENS_SPLICE_DISABLE == 1 #define AUTOTOKENS_MUT_MAX 12 @@ -112,7 +114,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, #define AUTOTOKENS_MUT_MAX 14 #endif - u32 max_rand = AUTOTOKENS_MUT_MAX; + u32 max_rand = AUTOTOKENS_MUT_MAX, new_item, pos; for (i = 0; i < rounds; ++i) { @@ -122,8 +124,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, case 0 ... 7: /* fall through */ { - u32 pos = rand_below(afl_ptr, m_size); - u32 cur_item = m[pos], new_item; + pos = rand_below(afl_ptr, m_size); + u32 cur_item = m[pos]; do { new_item = rand_below(afl_ptr, current_id); @@ -135,7 +137,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, ((whitespace_ids < new_item && whitespace_ids >= cur_item) || (whitespace_ids >= new_item && whitespace_ids < cur_item))))); - DEBUG(stderr, "MUT: %u -> %u\n", cur_item, new_item); + DEBUGF(stderr, "MUT: %u -> %u\n", cur_item, new_item); m[pos] = new_item; break; @@ -144,7 +146,6 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, /* INSERT (m_size +1 so we insert also after last place) */ case 8 ... 9: { - u32 new_item; do { new_item = rand_below(afl_ptr, current_id); @@ -154,7 +155,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, u32 pos = rand_below(afl_ptr, m_size + 1); m.insert(m.begin() + pos, new_item); ++m_size; - DEBUG(stderr, "INS: %u at %u\n", new_item, pos); + DEBUGF(stderr, "INS: %u at %u\n", new_item, pos); if (likely(!alternative_tokenize)) { @@ -212,7 +213,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, m.insert(m.begin() + dst_off, src->begin() + src_off, src->begin() + src_off + n); m_size += n; - DEBUG(stderr, "SPLICE-INS: %u at %u\n", n, dst_off); + DEBUGF(stderr, "SPLICE-INS: %u at %u\n", n, dst_off); + break; } @@ -231,13 +233,36 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, copy(src->begin() + src_off, src->begin() + src_off + n, m.begin() + dst_off); - DEBUG(stderr, "SPLICE-MUT: %u at %u\n", n, dst_off); + DEBUGF(stderr, "SPLICE-MUT: %u at %u\n", n, dst_off); break; } } + if (likely(!alternative_tokenize)) { + + // do we need a whitespace/token at the beginning? + if (dst_off && id_to_token[m[dst_off - 1]].size() > 1 && + id_to_token[m[dst_off]].size() > 1) { + + m.insert(m.begin() + dst_off, good_whitespace_or_singleval()); + ++m_size; + + } + + // do we need a whitespace/token at the end? + if (dst_off + n < m_size && + id_to_token[m[dst_off + n - 1]].size() > 1 && + id_to_token[m[dst_off + n]].size() > 1) { + + m.insert(m.begin() + dst_off + n, good_whitespace_or_singleval()); + ++m_size; + + } + + } + break; } @@ -249,11 +274,32 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, if (m_size > 8) { - m.erase(m.begin() + rand_below(afl_ptr, m_size)); - --m_size; + do { + + pos = rand_below(afl_ptr, m_size); + + } while (unlikely(pos < whitespace_ids)); + + // if what we delete will result in a missing whitespace/token, + // instead of deleting we switch the item to a whitespace or token. + if (likely(!alternative_tokenize) && pos && pos < m_size && + id_to_token[m[pos - 1]].size() > 1 && + id_to_token[m[pos + 1]].size() > 1) { + + m[pos] = good_whitespace_or_singleval(); + + } else { + + m.erase(m.begin() + pos); + --m_size; + + } } else { + // if the data is already too small do not try to make it smaller + // again this run. + max_rand = AUTOTOKENS_MUT_MAX - 2; } @@ -262,14 +308,12 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } - // TODO: add full line insert splice, replace splace, delete - } } - string output; - u32 m_size_1 = m_size - 1; + u32 m_size_1 = m_size - 1; + output = ""; for (i = 0; i < m_size; ++i) { @@ -282,31 +326,108 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } - u32 mutated_size = output.size(); - u8 *mutated_out = (u8 *)afl_realloc((void **)out_buf, mutated_size); + u32 mutated_size = (u32)output.size(); + u8 *mutated_out = (u8 *)output.data(); - if (unlikely(!mutated_out)) { + if (unlikely(mutated_size > max_size)) { mutated_size = max_size; } - *out_buf = NULL; - return 0; - - } - - if (unlikely(debug)) { + IFDEBUG { - DEBUG(stderr, "MUTATED to %u bytes:\n", mutated_size); + DEBUGF(stderr, "MUTATED to %u bytes:\n", mutated_size); fwrite(output.data(), 1, mutated_size, stderr); - DEBUG(stderr, "\n---\n"); + DEBUGF(stderr, "\n---\n"); } - memcpy(mutated_out, output.data(), mutated_size); *out_buf = mutated_out; ++fuzz_count; return mutated_size; } +/* I get f*cking stack overflow using C++ regex with a regex of + "\"[[:print:]]*?\"" if this matches a long string even with regex::optimize + enabled :-( */ +u8 my_search_string(string::const_iterator cur, string::const_iterator ende, + string::const_iterator *match_begin, + string::const_iterator *match_end) { + + string::const_iterator start = cur, found_begin; + u8 quote_type = 0; + + while (cur < ende) { + + switch (*cur) { + + case '"': { + + if (cur == start || *(cur - 1) != '\\') { + + if (!quote_type) { + + found_begin = cur; + quote_type = 1; + + } else if (quote_type == 1) { + + *match_begin = found_begin; + *match_end = cur + 1; + return 1; + + } + + } + + break; + + } + + case '\'': { + + if (cur == start || *(cur - 1) != '\\') { + + if (!quote_type) { + + found_begin = cur; + quote_type = 2; + + } else if (quote_type == 2) { + + *match_begin = found_begin; + *match_end = cur + 1; + return 1; + + } + + } + + break; + + } + + case '\n': + case '\r': + case 0: { + + quote_type = 0; + break; + + } + + default: + if (unlikely(quote_type && !isprint(*cur))) { quote_type = 0; } + break; + + } + + ++cur; + + } + + return 0; + +} + /* We are not using afl_custom_queue_new_entry() because not every corpus entry will be necessarily fuzzed. so we use afl_custom_queue_get() instead */ @@ -321,7 +442,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, (only_fav && !afl_ptr->queue_cur->favored)) { s = NULL; - DEBUG(stderr, "cmplog not ascii or only_fav and not favorite\n"); + DEBUGF(stderr, "cmplog not ascii or only_fav and not favorite\n"); return 0; } @@ -356,7 +477,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } ++extras_cnt; - DEBUG(stderr, "Added from dictionary: \"%s\"\n", ptr); + DEBUGF(stderr, "Added from dictionary: \"%s\"\n", ptr); } @@ -385,7 +506,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } ++a_extras_cnt; - DEBUG(stderr, "Added from auto dictionary: \"%s\"\n", ptr); + DEBUGF(stderr, "Added from auto dictionary: \"%s\"\n", ptr); } @@ -415,7 +536,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, fclose(fp); file_mapping[fn] = structure; // NULL ptr so we don't read the file again s = NULL; - DEBUG(stderr, "Too short (%lu) %s\n", len, filename); + DEBUGF(stderr, "Too short (%lu) %s\n", len, filename); return 0; } @@ -443,14 +564,14 @@ extern "C" unsigned char afl_custom_queue_get(void *data, file_mapping[fn] = NULL; s = NULL; - DEBUG(stderr, "Not text (%lu) %s\n", len, filename); + DEBUGF(stderr, "Not text (%lu) %s\n", len, filename); return 0; } } - // DEBUG(stderr, "Read %lu bytes for %s\nBefore comment trim:\n%s\n", + // DEBUGF(stderr, "Read %lu bytes for %s\nBefore comment trim:\n%s\n", // input.size(), filename, input.c_str()); if (regex_comment_custom) { @@ -463,15 +584,15 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - DEBUG(stderr, "After replace %lu bytes for %s\n%s\n", input.size(), - filename, input.c_str()); + DEBUGF(stderr, "After replace %lu bytes for %s\n%s\n", input.size(), + filename, input.c_str()); u32 spaces = count(input.begin(), input.end(), ' '); u32 tabs = count(input.begin(), input.end(), '\t'); u32 linefeeds = count(input.begin(), input.end(), '\n'); bool ends_with_linefeed = input[input.length() - 1] == '\n'; - DEBUG(stderr, "spaces=%u tabs=%u linefeeds=%u ends=%u\n", spaces, tabs, - linefeeds, ends_with_linefeed); + DEBUGF(stderr, "spaces=%u tabs=%u linefeeds=%u ends=%u\n", spaces, tabs, + linefeeds, ends_with_linefeed); all_spaces += spaces; all_tabs += tabs; all_lf += linefeeds; @@ -479,25 +600,28 @@ extern "C" unsigned char afl_custom_queue_get(void *data, // now extract all tokens vector tokens; - smatch match; - string::const_iterator cur = input.begin(), ende = input.end(), found, prev; + string::const_iterator cur = input.begin(), ende = input.end(), found, prev, + match_begin, match_end; - DEBUG(stderr, "START!\n"); + DEBUGF(stderr, "START!\n"); if (likely(!alternative_tokenize)) { - while (regex_search(cur, ende, match, regex_string, - regex_constants::match_any | - regex_constants::match_not_null | - regex_constants::match_continuous)) { + while (my_search_string(cur, ende, &match_begin, &match_end)) { prev = cur; - found = match[0].first; - cur = match[0].second; - DEBUG(stderr, - "string %s found at start %lu offset %lu continue at %lu\n", - match[0].str().c_str(), prev - input.begin(), match.position(), - cur - input.begin()); + found = match_begin; + cur = match_end; + + IFDEBUG { + + string foo(match_begin, match_end); + DEBUGF(stderr, + "string %s found at start %lu offset %lu continue at %lu\n", + foo.c_str(), prev - input.begin(), found - prev, + cur - input.begin()); + + } if (prev < found) { // there are items between search start and find while (prev < found) { @@ -512,8 +636,8 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } tokens.push_back(std::string(start, prev)); - DEBUG(stderr, "WHITESPACE %ld \"%s\"\n", prev - start, - tokens[tokens.size() - 1].c_str()); + DEBUGF(stderr, "WHITESPACE %ld \"%s\"\n", prev - start, + tokens[tokens.size() - 1].c_str()); } else if (isalnum(*prev) || *prev == '$' || *prev == '_') { @@ -525,14 +649,14 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - tokens.push_back(std::string(start, prev)); - DEBUG(stderr, "IDENTIFIER %ld \"%s\"\n", prev - start, - tokens[tokens.size() - 1].c_str()); + tokens.push_back(string(start, prev)); + DEBUGF(stderr, "IDENTIFIER %ld \"%s\"\n", prev - start, + tokens[tokens.size() - 1].c_str()); } else { - tokens.push_back(std::string(prev, prev + 1)); - DEBUG(stderr, "OTHER \"%c\"\n", *prev); + tokens.push_back(string(prev, prev + 1)); + DEBUGF(stderr, "OTHER \"%c\"\n", *prev); ++prev; } @@ -541,11 +665,12 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - if (match[0].length() > 0) { tokens.push_back(match[0]); } + tokens.push_back(string(match_begin, match_end)); + DEBUGF(stderr, "TOK: %s\n", tokens[tokens.size() - 1].c_str()); } - DEBUG(stderr, "AFTER all strings\n"); + DEBUGF(stderr, "AFTER all strings\n"); if (cur < ende) { @@ -561,8 +686,8 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } tokens.push_back(std::string(start, cur)); - DEBUG(stderr, "WHITESPACE %ld \"%s\"\n", cur - start, - tokens[tokens.size() - 1].c_str()); + DEBUGF(stderr, "WHITESPACE %ld \"%s\"\n", cur - start, + tokens[tokens.size() - 1].c_str()); } else if (isalnum(*cur) || *cur == '$' || *cur == '_') { @@ -575,13 +700,13 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } tokens.push_back(std::string(start, cur)); - DEBUG(stderr, "IDENTIFIER %ld \"%s\"\n", cur - start, - tokens[tokens.size() - 1].c_str()); + DEBUGF(stderr, "IDENTIFIER %ld \"%s\"\n", cur - start, + tokens[tokens.size() - 1].c_str()); } else { tokens.push_back(std::string(cur, cur + 1)); - DEBUG(stderr, "OTHER \"%c\"\n", *cur); + DEBUGF(stderr, "OTHER \"%c\"\n", *cur); ++cur; } @@ -593,19 +718,21 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } else { // alternative tokenize - - while (regex_search(cur, ende, match, regex_string, - regex_constants::match_any | - regex_constants::match_not_null | - regex_constants::match_continuous)) { + while (my_search_string(cur, ende, &match_begin, &match_end)) { prev = cur; - found = match[0].first; - cur = match[0].second; - DEBUG(stderr, - "string %s found at start %lu offset %lu continue at %lu\n", - match[0].str().c_str(), prev - input.begin(), match.position(), - cur - input.begin()); + found = match_begin; + cur = match_end; + IFDEBUG { + + string foo(match_begin, match_end); + DEBUGF(stderr, + "string %s found at start %lu offset %lu continue at %lu\n", + foo.c_str(), prev - input.begin(), found - prev, + cur - input.begin()); + + } + if (prev < found) { // there are items between search start and find sregex_token_iterator it{prev, found, regex_whitespace, -1}; vector tokenized{it, {}}; @@ -619,10 +746,10 @@ extern "C" unsigned char afl_custom_queue_get(void *data, tokenized.end()); tokens.reserve(tokens.size() + tokenized.size() * 2 + 1); - if (unlikely(debug)) { + IFDEBUG { - DEBUG(stderr, "tokens: %lu input size: %lu\n", tokenized.size(), - input.size()); + DEBUGF(stderr, "tokens1: %lu input size: %lu\n", tokenized.size(), + input.size()); for (auto x : tokenized) { cerr << x << endl; @@ -636,10 +763,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, string::const_iterator c = token.begin(), e = token.end(), f, p; smatch m; - while (regex_search(c, e, m, regex_word, - regex_constants::match_any | - regex_constants::match_not_null | - regex_constants::match_continuous)) { + while (regex_search(c, e, m, regex_word)) { p = c; f = m[0].first; @@ -649,10 +773,10 @@ extern "C" unsigned char afl_custom_queue_get(void *data, // there are items between search start and find while (p < f) { - if (unlikely(debug)) { + IFDEBUG { string foo(p, p + 1); - DEBUG(stderr, "before string: \"%s\"\n", foo.c_str()); + DEBUGF(stderr, "before string: \"%s\"\n", foo.c_str()); } @@ -661,20 +785,21 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - /* - string foo(p, f); - DEBUG(stderr, "before string: \"%s\"\n", - foo.c_str()); tokens.push_back(std::string(p, f)); - */ + IFDEBUG { + + string foo(p, f); + DEBUGF(stderr, "before string: \"%s\"\n", foo.c_str()); + tokens.push_back(std::string(p, f)); + + } } - DEBUG( - stderr, - "SUBstring \"%s\" found at start %lu offset %lu continue at " - "%lu\n", - m[0].str().c_str(), p - input.begin(), m.position(), - c - token.begin()); + DEBUGF(stderr, + "SUBstring \"%s\" found at start %lu offset %lu continue " + "at %lu\n", + m[0].str().c_str(), p - input.begin(), m.position(), + c - token.begin()); tokens.push_back(m[0].str()); } @@ -683,10 +808,10 @@ extern "C" unsigned char afl_custom_queue_get(void *data, while (c < e) { - if (unlikely(debug)) { + IFDEBUG { string foo(c, c + 1); - DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); + DEBUGF(stderr, "after string: \"%s\"\n", foo.c_str()); } @@ -695,17 +820,14 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - /* - if (unlikely(debug)) { + IFDEBUG { - string foo(c, e); - DEBUG(stderr, "after string: \"%s\"\n", - foo.c_str()); + string foo(c, e); + DEBUGF(stderr, "after string: \"%s\"\n", foo.c_str()); - } + } - tokens.push_back(std::string(c, e)); - */ + tokens.push_back(std::string(c, e)); } @@ -713,7 +835,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - if (match[0].length() > 0) { tokens.push_back(match[0]); } + tokens.push_back(string(match_begin, match_end)); } @@ -727,10 +849,10 @@ extern "C" unsigned char afl_custom_queue_get(void *data, tokenized.end()); tokens.reserve(tokens.size() + tokenized.size() * 2 + 1); - if (unlikely(debug)) { + IFDEBUG { - DEBUG(stderr, "tokens: %lu input size: %lu\n", tokenized.size(), - input.size()); + DEBUGF(stderr, "tokens2: %lu input size: %lu\n", tokenized.size(), + input.size()); for (auto x : tokenized) { cerr << x << endl; @@ -744,10 +866,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, string::const_iterator c = token.begin(), e = token.end(), f, p; smatch m; - while (regex_search(c, e, m, regex_word, - regex_constants::match_any | - regex_constants::match_not_null | - regex_constants::match_continuous)) { + while (regex_search(c, e, m, regex_word)) { p = c; f = m[0].first; @@ -757,10 +876,10 @@ extern "C" unsigned char afl_custom_queue_get(void *data, // there are items between search start and find while (p < f) { - if (unlikely(debug)) { + IFDEBUG { string foo(p, p + 1); - DEBUG(stderr, "before string: \"%s\"\n", foo.c_str()); + DEBUGF(stderr, "before string: \"%s\"\n", foo.c_str()); } @@ -769,25 +888,22 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - /* - if (unlikely(debug)) { + IFDEBUG { - string foo(p, f); - DEBUG(stderr, "before string: \"%s\"\n", - foo.c_str()); + string foo(p, f); + DEBUGF(stderr, "before string: \"%s\"\n", foo.c_str()); - } + } - tokens.push_back(std::string(p, f)); - */ + tokens.push_back(std::string(p, f)); } - DEBUG(stderr, - "SUB2string \"%s\" found at start %lu offset %lu continue at " - "%lu\n", - m[0].str().c_str(), p - input.begin(), m.position(), - c - token.begin()); + DEBUGF(stderr, + "SUB2string \"%s\" found at start %lu offset %lu continue " + "at %lu\n", + m[0].str().c_str(), p - input.begin(), m.position(), + c - token.begin()); tokens.push_back(m[0].str()); } @@ -796,10 +912,10 @@ extern "C" unsigned char afl_custom_queue_get(void *data, while (c < e) { - if (unlikely(debug)) { + IFDEBUG { string foo(c, c + 1); - DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); + DEBUGF(stderr, "after string: \"%s\"\n", foo.c_str()); } @@ -808,16 +924,14 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - /* - if (unlikely(debug)) { + IFDEBUG { - string foo(c, e); - DEBUG(stderr, "after string: \"%s\"\n", foo.c_str()); + string foo(c, e); + DEBUGF(stderr, "after string: \"%s\"\n", foo.c_str()); - } + } - tokens.push_back(std::string(c, e)); - */ + tokens.push_back(std::string(c, e)); } @@ -827,22 +941,22 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - if (unlikely(debug)) { + IFDEBUG { - DEBUG(stderr, "DUMPING TOKENS:\n"); + DEBUGF(stderr, "DUMPING TOKENS:\n"); u32 size_1 = tokens.size() - 1; for (u32 i = 0; i < tokens.size(); ++i) { - DEBUG(stderr, "%s", tokens[i].c_str()); + DEBUGF(stderr, "%s", tokens[i].c_str()); if (unlikely(alternative_tokenize && i < size_1)) { - DEBUG(stderr, "%s", whitespace.c_str()); + DEBUGF(stderr, "%s", whitespace.c_str()); } } - DEBUG(stderr, "---------------------------\n"); + DEBUGF(stderr, "---------------------------\n"); } @@ -850,7 +964,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, file_mapping[fn] = NULL; s = NULL; - DEBUG(stderr, "too few tokens\n"); + DEBUGF(stderr, "too few tokens\n"); return 0; } @@ -886,21 +1000,23 @@ extern "C" unsigned char afl_custom_queue_get(void *data, all_structure_items += structure->size(); // we are done! - DEBUG(stderr, "DONE! We have %lu tokens in the structure\n", - structure->size()); + DEBUGF(stderr, "DONE! We have %lu tokens in the structure\n", + structure->size()); + + } - } else { + else { if (entry->second == NULL) { - DEBUG(stderr, "Skipping %s\n", filename); + DEBUGF(stderr, "Skipping %s\n", filename); s = NULL; return 0; } s = entry->second; - DEBUG(stderr, "OK %s\n", filename); + DEBUGF(stderr, "OK %s\n", filename); } -- cgit v1.2.3 From afff6f642c77e4986fdb8a4e9799c1a52e80ce32 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Thu, 19 Jan 2023 13:41:48 +0100 Subject: optimize --- custom_mutators/autotokens/autotokens.cpp | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 102bea0f..149ae430 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -109,9 +109,9 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, // DEBUGF(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds); #if AUTOTOKENS_SPLICE_DISABLE == 1 - #define AUTOTOKENS_MUT_MAX 12 + #define AUTOTOKENS_MUT_MAX 18 #else - #define AUTOTOKENS_MUT_MAX 14 + #define AUTOTOKENS_MUT_MAX 27 #endif u32 max_rand = AUTOTOKENS_MUT_MAX, new_item, pos; @@ -120,8 +120,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, switch (rand_below(afl_ptr, max_rand)) { - /* CHANGE */ - case 0 ... 7: /* fall through */ + /* CHANGE/MUTATE single item */ + case 0 ... 9: { pos = rand_below(afl_ptr, m_size); @@ -144,7 +144,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } /* INSERT (m_size +1 so we insert also after last place) */ - case 8 ... 9: { + case 10 ... 13: { do { @@ -192,7 +192,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, #if AUTOTOKENS_SPLICE_DISABLE != 1 /* SPLICING */ - case 10 ... 11: { + case 14 ... 22: { u32 strategy = rand_below(afl_ptr, 4), dst_off, n; auto src = id_mapping[rand_below(afl_ptr, valid_structures)]; @@ -278,11 +278,11 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, pos = rand_below(afl_ptr, m_size); - } while (unlikely(pos < whitespace_ids)); + } while (unlikely(m[pos] < whitespace_ids)); // if what we delete will result in a missing whitespace/token, // instead of deleting we switch the item to a whitespace or token. - if (likely(!alternative_tokenize) && pos && pos < m_size && + if (likely(!alternative_tokenize) && pos && pos + 1 < m_size && id_to_token[m[pos - 1]].size() > 1 && id_to_token[m[pos + 1]].size() > 1) { @@ -300,7 +300,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, // if the data is already too small do not try to make it smaller // again this run. - max_rand = AUTOTOKENS_MUT_MAX - 2; + max_rand -= 4; } @@ -734,6 +734,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } if (prev < found) { // there are items between search start and find + sregex_token_iterator it{prev, found, regex_whitespace, -1}; vector tokenized{it, {}}; tokenized.erase(std::remove_if(tokenized.begin(), tokenized.end(), -- cgit v1.2.3 From 86d3c65559209ce12452e18daf96946222c19b46 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Thu, 19 Jan 2023 15:59:57 +0100 Subject: nit --- custom_mutators/autotokens/autotokens.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 149ae430..f4b96c7b 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -121,8 +121,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, switch (rand_below(afl_ptr, max_rand)) { /* CHANGE/MUTATE single item */ - case 0 ... 9: - { + case 0 ... 9: { pos = rand_below(afl_ptr, m_size); u32 cur_item = m[pos]; @@ -438,8 +437,9 @@ extern "C" unsigned char afl_custom_queue_get(void *data, if (likely(!debug)) { - if ((afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) || - (only_fav && !afl_ptr->queue_cur->favored)) { + if (unlikely(!afl_ptr->custom_only) && + ((afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) || + (only_fav && !afl_ptr->queue_cur->favored))) { s = NULL; DEBUGF(stderr, "cmplog not ascii or only_fav and not favorite\n"); -- cgit v1.2.3 From 628b4b60021a0d62a2eccddca4fe321c9d57c663 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Thu, 19 Jan 2023 17:24:56 +0100 Subject: enhance examples --- custom_mutators/README.md | 10 ++++++++++ custom_mutators/examples/custom_send.c | 9 ++++++++- custom_mutators/examples/example.c | 4 +++- custom_mutators/examples/post_library_gif.so.c | 4 ++-- 4 files changed, 23 insertions(+), 4 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/README.md b/custom_mutators/README.md index 0289e150..8d01856f 100644 --- a/custom_mutators/README.md +++ b/custom_mutators/README.md @@ -11,6 +11,16 @@ The `./examples` folder contains examples for custom mutators in python and C. In `./rust`, you will find rust bindings, including a simple example in `./rust/example` and an example for structured fuzzing, based on lain, in`./rust/example_lain`. +## The AFL++ grammar agnostic grammar mutator + +In `./autotokens` you find a token-level fuzzer that does not need to know +anything about the grammar of an input as long as it is in ascii and allows +whitespace. +It is very fast and effective. + +If you are looking for an example of how to effectively create a custom +mutator take a look at this one. + ## The AFL++ Grammar Mutator If you use git to clone AFL++, then the following will incorporate our diff --git a/custom_mutators/examples/custom_send.c b/custom_mutators/examples/custom_send.c index ffea927e..7de72819 100644 --- a/custom_mutators/examples/custom_send.c +++ b/custom_mutators/examples/custom_send.c @@ -1,7 +1,14 @@ +// +// This is an example on how to use afl_custom_send +// It writes each mutated data set to /tmp/foo +// You can modify this to send to IPC, shared memory, etc. +// // cc -O3 -fPIC -shared -g -o custom_send.so -I../../include custom_send.c // cd ../.. // afl-cc -o test-instr test-instr.c -// afl-fuzz -i in -o out -- ./test-instr -f /tmp/foo +// AFL_CUSTOM_MUTATOR_LIBRARY=custom_mutators/examples/custom_send.so \ +// afl-fuzz -i in -o out -- ./test-instr -f /tmp/foo +// #include "custom_mutator_helpers.h" diff --git a/custom_mutators/examples/example.c b/custom_mutators/examples/example.c index 3f299508..e680ec8e 100644 --- a/custom_mutators/examples/example.c +++ b/custom_mutators/examples/example.c @@ -6,7 +6,7 @@ Dominik Maier */ -// You need to use -I /path/to/AFLplusplus/include +// You need to use -I/path/to/AFLplusplus/include -I. #include "custom_mutator_helpers.h" #include @@ -118,6 +118,8 @@ size_t afl_custom_fuzz(my_mutator_t *data, uint8_t *buf, size_t buf_size, } + if (max_size > mutated_size) { mutated_size = max_size; } + *out_buf = mutated_out; return mutated_size; diff --git a/custom_mutators/examples/post_library_gif.so.c b/custom_mutators/examples/post_library_gif.so.c index 9cd224f4..3cb018a6 100644 --- a/custom_mutators/examples/post_library_gif.so.c +++ b/custom_mutators/examples/post_library_gif.so.c @@ -129,8 +129,8 @@ size_t afl_custom_post_process(post_state_t *data, unsigned char *in_buf, /* Allocate memory for new buffer, reusing previous allocation if possible. Note we have to use afl-fuzz's own realloc! - Note that you should only do this if you need to grow the buffer, - otherwise work with in_buf, and assign it to *out_buf instead. */ + We use afl_realloc because it is effective. + You can also work within in_buf, and assign it to *out_buf. */ *out_buf = afl_realloc(out_buf, len); -- cgit v1.2.3 From 67cfe4f6d4a03c596a5c3e1aa97d64d79263746a Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Thu, 19 Jan 2023 22:24:24 +0100 Subject: nits --- custom_mutators/autotokens/autotokens.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index f4b96c7b..16ee8109 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -544,7 +544,15 @@ extern "C" unsigned char afl_custom_queue_get(void *data, string input; input.resize(len); rewind(fp); - fread((void *)input.data(), input.size(), 1, fp); + + if (fread((void *)input.data(), 1, len, fp) != len) { + + s = NULL; + DEBUGF(stderr, "Too short read %s\n", len, filename); + return 0; + + } + fclose(fp); if (!afl_ptr->shm.cmplog_mode) { -- cgit v1.2.3 From bd2cb4cd1c2f07d5406875771cd41fb9a6e1f84d Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Fri, 20 Jan 2023 12:22:29 +0100 Subject: more default tokens --- custom_mutators/autotokens/autotokens.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 16ee8109..f9b5bd2e 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -1105,6 +1105,12 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { id_to_token[current_id] = "\n\n\n\n"; ++current_id; whitespace_ids = current_id; + token_to_id["\""] = current_id; + id_to_token[current_id] = "\""; + ++current_id; + token_to_id["'"] = current_id; + id_to_token[current_id] = "'"; + ++current_id; } -- cgit v1.2.3 From 47f35d29ac53ed1cdb87f65591b62947a7965060 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Fri, 27 Jan 2023 14:32:18 +0100 Subject: fix --- custom_mutators/autotokens/autotokens.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index f9b5bd2e..4a2cc08f 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -61,8 +61,10 @@ static unordered_map id_to_token; static string whitespace = AUTOTOKENS_WHITESPACE; static string output; static regex *regex_comment_custom; -static regex regex_comment_star("/\\*([:print:]|\n)*?\\*/", - regex::multiline | regex::optimize); +// multiline requires g++-11 libs :( +static regex regex_comment_star( + "/\\*([:print:]|\n)*?\\*/", + regex_constants::optimize /* | regex_constants::multiline */); static regex regex_word("[A-Za-z0-9_$.-]+", regex::optimize); static regex regex_whitespace(R"([ \t]+)", regex::optimize); static vector *s; // the structure of the currently selected input @@ -548,7 +550,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, if (fread((void *)input.data(), 1, len, fp) != len) { s = NULL; - DEBUGF(stderr, "Too short read %s\n", len, filename); + DEBUGF(stderr, "Too short read %s\n", filename); return 0; } -- cgit v1.2.3 From b5d8d4c866137a8a6bd55225b0eaf723123c46c9 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Sun, 29 Jan 2023 10:07:33 +0100 Subject: comment --- custom_mutators/autotokens/autotokens.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 4a2cc08f..0a010f0b 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -1,3 +1,9 @@ +/* + token level fuzzing custom mutator for afl++ + (c) by Marc Heuse + License: Apache 2.0 +*/ + extern "C" { #include "afl-fuzz.h" -- cgit v1.2.3 From 91ccbf3f68ab9e6e4bc277f86c3efed666867132 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 1 Feb 2023 17:16:51 +0100 Subject: fix --- custom_mutators/autotokens/autotokens.cpp | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 0a010f0b..548e1be9 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -451,7 +451,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, s = NULL; DEBUGF(stderr, "cmplog not ascii or only_fav and not favorite\n"); - return 0; + return 1; } @@ -532,7 +532,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, if (!fp) { s = NULL; - return 0; + return 1; } // should not happen @@ -545,7 +545,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, file_mapping[fn] = structure; // NULL ptr so we don't read the file again s = NULL; DEBUGF(stderr, "Too short (%lu) %s\n", len, filename); - return 0; + return 1; } @@ -557,7 +557,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, s = NULL; DEBUGF(stderr, "Too short read %s\n", filename); - return 0; + return 1; } @@ -581,7 +581,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, file_mapping[fn] = NULL; s = NULL; DEBUGF(stderr, "Not text (%lu) %s\n", len, filename); - return 0; + return 1; } @@ -982,7 +982,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, file_mapping[fn] = NULL; s = NULL; DEBUGF(stderr, "too few tokens\n"); - return 0; + return 1; } @@ -1020,15 +1020,13 @@ extern "C" unsigned char afl_custom_queue_get(void *data, DEBUGF(stderr, "DONE! We have %lu tokens in the structure\n", structure->size()); - } - - else { + } else { if (entry->second == NULL) { DEBUGF(stderr, "Skipping %s\n", filename); s = NULL; - return 0; + return 1; } -- cgit v1.2.3 From e1434bcfcd8c13de838559fd7b797d1a3cd5a672 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Sat, 4 Feb 2023 14:34:47 +0100 Subject: more autotoken options --- custom_mutators/autotokens/TODO | 17 ++++++++++++ custom_mutators/autotokens/autotokens.cpp | 45 ++++++++++++++++++++++++++++++- 2 files changed, 61 insertions(+), 1 deletion(-) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO index 2e39511c..3cae3060 100644 --- a/custom_mutators/autotokens/TODO +++ b/custom_mutators/autotokens/TODO @@ -4,3 +4,20 @@ create from thin air if no good seed after a cycle and dict large enough? (static u32 no_of_struct_inputs;) splicing -> check if whitespace/token is needed + +whitespace/token check only AFTER mutation + +analyse welche einen DICT haben, und welche davon rein ascii + +corpus analyse: + + libxml + - hardbuzz + - sqlite + - libpcap +min len, max len, % wenn 95/98/99/100 ascii + +funktion und env für menge an mutationen + +env für menge an per mutation run + +only add inital dictionary, not furher finds, e.g. cmplog diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 548e1be9..a0125851 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -28,6 +28,9 @@ extern "C" { #define AUTOTOKENS_SIZE_MIN 8 #define AUTOTOKENS_SPLICE_MIN 4 #define AUTOTOKENS_SPLICE_MAX 64 +#define AUTOTOKENS_FUZZ_COUNT_SHIFT 0 +// 0 = no learning, 1 only from -x dict/autodict, 2 also from cmplog +#define AUTOTOKENS_LEARN_DICT 2 #ifndef AUTOTOKENS_SPLICE_DISABLE #define AUTOTOKENS_SPLICE_DISABLE 0 #endif @@ -53,6 +56,8 @@ static afl_state *afl_ptr; static int debug = AUTOTOKENS_DEBUG; static int only_fav = AUTOTOKENS_ONLY_FAV; static int alternative_tokenize = AUTOTOKENS_ALTERNATIVE_TOKENIZE; +static int learn_dictionary_tokens = AUTOTOKENS_LEARN_DICT; +static int fuzz_count_shift = AUTOTOKENS_FUZZ_COUNT_SHIFT; static u32 current_id; static u32 valid_structures; static u32 whitespace_ids; @@ -94,6 +99,22 @@ u32 good_whitespace_or_singleval() { } +extern "C" u32 afl_custom_fuzz_count(void *data, const u8 *buf, + size_t buf_size) { + + if (s == NULL) return 0; + + u32 shift = unlikely(afl_ptr->custom_only) ? 7 : 8; + u32 stage_max = (u32)((HAVOC_CYCLES * afl_ptr->queue_cur->perf_score) / + afl_ptr->havoc_div) >> + shift; + if (fuzz_count_shift) { stage_max >>= (u32)fuzz_count_shift; }; + DEBUGF(stderr, "fuzz count: %u\n", stage_max); + + return stage_max; + +} + extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, u8 **out_buf, u8 *add_buf, size_t add_buf_size, size_t max_size) { @@ -441,6 +462,7 @@ u8 my_search_string(string::const_iterator cur, string::const_iterator ende, extern "C" unsigned char afl_custom_queue_get(void *data, const unsigned char *filename) { + static int learn_state; (void)(data); if (likely(!debug)) { @@ -458,7 +480,9 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } // check if there are new dictionary entries and add them to the tokens - if (valid_structures) { + if (valid_structures && learn_state < learn_dictionary_tokens) { + + if (unlikely(!learn_state)) { learn_state = 1; } while (extras_cnt < afl_ptr->extras_cnt) { @@ -1053,6 +1077,25 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { if (getenv("AUTOTOKENS_DEBUG")) { debug = 1; } if (getenv("AUTOTOKENS_ONLY_FAV")) { only_fav = 1; } if (getenv("AUTOTOKENS_ALTERNATIVE_TOKENIZE")) { alternative_tokenize = 1; } + + if (getenv("AUTOTOKENS_LEARN_DICT")) { + + learn_dictionary_tokens = atoi(getenv("AUTOTOKENS_LEARN_DICT")); + if (learn_dictionary_tokens < 0 || learn_dictionary_tokens > 2) { + + learn_dictionary_tokens = 2; + + } + + } + + if (getenv("AUTOTOKENS_FUZZ_COUNT_SHIFT")) { + + fuzz_count_shift = atoi(getenv("AUTOTOKENS_FUZZ_COUNT_SHIFT")); + if (fuzz_count_shift < 0 || fuzz_count_shift > 16) { fuzz_count_shift = 0; } + + } + if (getenv("AUTOTOKENS_WHITESPACE")) { whitespace = getenv("AUTOTOKENS_WHITESPACE"); -- cgit v1.2.3 From ec87abda93d68f489f26ed2a2ae75b4f1e26d0bb Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Sat, 4 Feb 2023 14:37:28 +0100 Subject: readme --- custom_mutators/autotokens/README | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README index f82dcd98..86e7c9b3 100644 --- a/custom_mutators/autotokens/README +++ b/custom_mutators/autotokens/README @@ -18,6 +18,12 @@ Do **not** set `AFL_DISABLE_TRIM` with this custom mutator! `AUTOTOKENS_ONLY_FAV` - only use this mutator on favorite queue items `AUTOTOKENS_COMMENT` - what character or string starts a comment which will be removed. Default: `/* ... */` +`AUTOTOKENS_FUZZ_COUNT_SHIFT` - reduce the number of fuzzing performed, shifting + the value by this number set, e.g. 1. +`AUTOTOKENS_LEARN_DICT` - learn from dictionaries? + 0 = none + 1 = only -x or autodict + 2 = -x, autodict and `CMPLOG` `AUTOTOKENS_ALTERNATIVE_TOKENIZE` - use an alternative tokenize implementation (experimental) `AUTOTOKENS_WHITESPACE` - whitespace string to use for ALTERNATIVE_TOKENIZE, -- cgit v1.2.3 From 90f61552f794fc0fae5dc2585f81f31d32db1e89 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Sat, 4 Feb 2023 15:39:03 +0100 Subject: changes --- custom_mutators/autotokens/TODO | 9 ++++----- custom_mutators/autotokens/autotokens.cpp | 12 ++++++++++++ 2 files changed, 16 insertions(+), 5 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO index 3cae3060..528dff1f 100644 --- a/custom_mutators/autotokens/TODO +++ b/custom_mutators/autotokens/TODO @@ -1,5 +1,3 @@ -cmplog: only add tokens that were found to fit? - create from thin air if no good seed after a cycle and dict large enough? (static u32 no_of_struct_inputs;) @@ -16,8 +14,9 @@ corpus analyse: - libpcap min len, max len, % wenn 95/98/99/100 ascii -funktion und env für menge an mutationen - env für menge an per mutation run -only add inital dictionary, not furher finds, e.g. cmplog +AFL_TXT_MAX_LEN 65535 +AFL_TXT_MIN_LEN 16 +AFL_TXT_MIN_PERCENT=99 + diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index a0125851..46a347f8 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -34,6 +34,9 @@ extern "C" { #ifndef AUTOTOKENS_SPLICE_DISABLE #define AUTOTOKENS_SPLICE_DISABLE 0 #endif +#ifndef AFL_TXT_MAX_LEN + #define AFL_TXT_MAX_LEN 65535 +#endif #if AUTOTOKENS_SPLICE_MIN >= AUTOTOKENS_SIZE_MIN #error SPLICE_MIN must be lower than SIZE_MIN @@ -571,6 +574,15 @@ extern "C" unsigned char afl_custom_queue_get(void *data, DEBUGF(stderr, "Too short (%lu) %s\n", len, filename); return 1; + } else + if (len > AFL_TXT_MAX_LEN) { + + fclose(fp); + file_mapping[fn] = structure; // NULL ptr so we don't read the file again + s = NULL; + DEBUGF(stderr, "Too long (%lu) %s\n", len, filename); + return 1; + } string input; -- cgit v1.2.3 From f99656e22bffb4bfac8e201ad973a1ea5a6abaa0 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Sun, 5 Feb 2023 13:15:06 +0100 Subject: create from thin air, max mutation --- custom_mutators/autotokens/autotokens.cpp | 97 +++++++++++++++++++++++++------ 1 file changed, 78 insertions(+), 19 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 46a347f8..f1263600 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -24,10 +24,12 @@ extern "C" { #define AUTOTOKENS_ONLY_FAV 0 #define AUTOTOKENS_ALTERNATIVE_TOKENIZE 0 #define AUTOTOKENS_CHANGE_MIN 8 +#define AUTOTOKENS_CHANGE_MAX 64 #define AUTOTOKENS_WHITESPACE " " #define AUTOTOKENS_SIZE_MIN 8 #define AUTOTOKENS_SPLICE_MIN 4 #define AUTOTOKENS_SPLICE_MAX 64 +#define AUTOTOKENS_CREATE_FROM_THIN_AIR 1 #define AUTOTOKENS_FUZZ_COUNT_SHIFT 0 // 0 = no learning, 1 only from -x dict/autodict, 2 also from cmplog #define AUTOTOKENS_LEARN_DICT 2 @@ -61,6 +63,7 @@ static int only_fav = AUTOTOKENS_ONLY_FAV; static int alternative_tokenize = AUTOTOKENS_ALTERNATIVE_TOKENIZE; static int learn_dictionary_tokens = AUTOTOKENS_LEARN_DICT; static int fuzz_count_shift = AUTOTOKENS_FUZZ_COUNT_SHIFT; +static int create_from_thin_air = AUTOTOKENS_CREATE_FROM_THIN_AIR; static u32 current_id; static u32 valid_structures; static u32 whitespace_ids; @@ -83,7 +86,18 @@ static regex regex_word("[A-Za-z0-9_$.-]+", regex::optimize); static regex regex_whitespace(R"([ \t]+)", regex::optimize); static vector *s; // the structure of the currently selected input -u32 good_whitespace_or_singleval() { +// FUNCTIONS + +/* This function is called once after everything is set up but before + any fuzzing attempt has been performed. + This is called in afl_custom_queue_get() */ +static void first_run(void *data) { + + (void)(data); + +} + +static u32 good_whitespace_or_singleval() { u32 i = rand_below(afl_ptr, current_id); if (id_to_token[i].size() == 1) { return i; } @@ -105,6 +119,8 @@ u32 good_whitespace_or_singleval() { extern "C" u32 afl_custom_fuzz_count(void *data, const u8 *buf, size_t buf_size) { + (void)(data); + if (s == NULL) return 0; u32 shift = unlikely(afl_ptr->custom_only) ? 7 : 8; @@ -135,9 +151,10 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, u32 i, m_size = (u32)m.size(); u32 rounds = - MAX(AUTOTOKENS_CHANGE_MIN, - MIN(m_size >> 3, HAVOC_CYCLES * afl_ptr->queue_cur->perf_score * - afl_ptr->havoc_div / 256)); + MIN(AUTOTOKENS_CHANGE_MAX, + MAX(AUTOTOKENS_CHANGE_MIN, + MIN(m_size >> 3, HAVOC_CYCLES * afl_ptr->queue_cur->perf_score * + afl_ptr->havoc_div / 256))); // DEBUGF(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds); #if AUTOTOKENS_SPLICE_DISABLE == 1 @@ -379,9 +396,10 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, /* I get f*cking stack overflow using C++ regex with a regex of "\"[[:print:]]*?\"" if this matches a long string even with regex::optimize enabled :-( */ -u8 my_search_string(string::const_iterator cur, string::const_iterator ende, - string::const_iterator *match_begin, - string::const_iterator *match_end) { +static u8 my_search_string(string::const_iterator cur, + string::const_iterator ende, + string::const_iterator *match_begin, + string::const_iterator *match_end) { string::const_iterator start = cur, found_begin; u8 quote_type = 0; @@ -460,25 +478,30 @@ u8 my_search_string(string::const_iterator cur, string::const_iterator ende, } /* We are not using afl_custom_queue_new_entry() because not every corpus entry - will be necessarily fuzzed. so we use afl_custom_queue_get() instead */ + will be necessarily fuzzed with this custom mutator. + So we use afl_custom_queue_get() instead. */ extern "C" unsigned char afl_custom_queue_get(void *data, const unsigned char *filename) { - static int learn_state; + static int learn_state = 0; + static int is_first_run = 1; (void)(data); - if (likely(!debug)) { + if (unlikely(is_first_run)) { - if (unlikely(!afl_ptr->custom_only) && - ((afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) || - (only_fav && !afl_ptr->queue_cur->favored))) { + is_first_run = 0; + first_run(data); - s = NULL; - DEBUGF(stderr, "cmplog not ascii or only_fav and not favorite\n"); - return 1; + } - } + if (unlikely(!afl_ptr->custom_only) && !create_from_thin_air && + ((afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) || + (only_fav && !afl_ptr->queue_cur->favored))) { + + s = NULL; + DEBUGF(stderr, "cmplog not ascii or only_fav and not favorite\n"); + return 1; } @@ -551,6 +574,42 @@ extern "C" unsigned char afl_custom_queue_get(void *data, string fn = (char *)filename; auto entry = file_mapping.find(fn); + // if there is only one active queue item at start and it is very small + // the we create once a structure randomly. + if (unlikely(create_from_thin_air)) { + + if (current_id > whitespace_ids + 6 && afl_ptr->active_items == 1 && + afl_ptr->queue_cur->len < AFL_TXT_MIN_LEN) { + + DEBUGF(stderr, "Creating an entry from thin air...\n"); + structure = new vector(); + u32 item, prev, cnt = current_id >> 1; + structure->reserve(cnt + 4); + for (u32 i = 0; i < cnt; i++) { + + item = rand_below(afl_ptr, current_id); + if (i && id_to_token[item].length() > 1 && + id_to_token[prev].length() > 1) { + + structure->push_back(good_whitespace_or_singleval()); + + } + + structure->push_back(item); + prev = item; + + } + + file_mapping[fn] = structure; + s = structure; + return 1; + + } + + create_from_thin_air = 0; + + } + if (entry == file_mapping.end()) { // this input file was not analyzed for tokens yet, so let's do it! @@ -574,8 +633,7 @@ extern "C" unsigned char afl_custom_queue_get(void *data, DEBUGF(stderr, "Too short (%lu) %s\n", len, filename); return 1; - } else - if (len > AFL_TXT_MAX_LEN) { + } else if (len > AFL_TXT_MAX_LEN) { fclose(fp); file_mapping[fn] = structure; // NULL ptr so we don't read the file again @@ -1088,6 +1146,7 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { if (getenv("AUTOTOKENS_DEBUG")) { debug = 1; } if (getenv("AUTOTOKENS_ONLY_FAV")) { only_fav = 1; } + if (getenv("AUTOTOKENS_CREATE_FROM_THIN_AIR")) { create_from_thin_air = 1; } if (getenv("AUTOTOKENS_ALTERNATIVE_TOKENIZE")) { alternative_tokenize = 1; } if (getenv("AUTOTOKENS_LEARN_DICT")) { -- cgit v1.2.3 From e6120282556e4df79c01236849e5f6f225b8e428 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Sun, 5 Feb 2023 14:19:10 +0100 Subject: dict fix --- custom_mutators/autotokens/README | 3 +++ custom_mutators/autotokens/autotokens.cpp | 22 +++++++++++++++------- 2 files changed, 18 insertions(+), 7 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README index 86e7c9b3..d8613232 100644 --- a/custom_mutators/autotokens/README +++ b/custom_mutators/autotokens/README @@ -24,6 +24,9 @@ Do **not** set `AFL_DISABLE_TRIM` with this custom mutator! 0 = none 1 = only -x or autodict 2 = -x, autodict and `CMPLOG` +`AUTOTOKENS_CREATE_FROM_THIN_AIR` - if only one small start file is present and + a dictionary loaded then create one initial + structure based on the dictionary. `AUTOTOKENS_ALTERNATIVE_TOKENIZE` - use an alternative tokenize implementation (experimental) `AUTOTOKENS_WHITESPACE` - whitespace string to use for ALTERNATIVE_TOKENIZE, diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index f1263600..d3ae7e9c 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -29,7 +29,7 @@ extern "C" { #define AUTOTOKENS_SIZE_MIN 8 #define AUTOTOKENS_SPLICE_MIN 4 #define AUTOTOKENS_SPLICE_MAX 64 -#define AUTOTOKENS_CREATE_FROM_THIN_AIR 1 +#define AUTOTOKENS_CREATE_FROM_THIN_AIR 0 #define AUTOTOKENS_FUZZ_COUNT_SHIFT 0 // 0 = no learning, 1 only from -x dict/autodict, 2 also from cmplog #define AUTOTOKENS_LEARN_DICT 2 @@ -506,14 +506,15 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } // check if there are new dictionary entries and add them to the tokens - if (valid_structures && learn_state < learn_dictionary_tokens) { + if (likely(valid_structures || create_from_thin_air) && + learn_state < learn_dictionary_tokens) { if (unlikely(!learn_state)) { learn_state = 1; } while (extras_cnt < afl_ptr->extras_cnt) { u32 ok = 1, l = afl_ptr->extras[extras_cnt].len; - u8 *ptr = afl_ptr->extras[extras_cnt].data; + u8 *buf, *ptr = afl_ptr->extras[extras_cnt].data; for (u32 i = 0; i < l; ++i) { @@ -528,14 +529,17 @@ extern "C" unsigned char afl_custom_queue_get(void *data, if (ok) { - token_to_id[(char *)ptr] = current_id; - id_to_token[current_id] = (char *)ptr; + buf = (u8 *)malloc(afl_ptr->extras[extras_cnt].len + 1); + memcpy(buf, afl_ptr->extras[extras_cnt].data, + afl_ptr->extras[extras_cnt].len); + buf[afl_ptr->extras[extras_cnt].len] = 0; + token_to_id[(char *)buf] = current_id; + id_to_token[current_id] = (char *)buf; ++current_id; } ++extras_cnt; - DEBUGF(stderr, "Added from dictionary: \"%s\"\n", ptr); } @@ -600,8 +604,12 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - file_mapping[fn] = structure; s = structure; + file_mapping[fn] = structure; + id_mapping[valid_structures] = structure; + ++valid_structures; + all_structure_items += structure->size(); + return 1; } -- cgit v1.2.3 From 8a2547073c500fcd637a7b276b7a38313bb70b5f Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Mon, 6 Feb 2023 08:51:20 +0100 Subject: more options --- custom_mutators/autotokens/README | 2 ++ custom_mutators/autotokens/TODO | 4 +++- custom_mutators/autotokens/autotokens.cpp | 26 ++++++++++++++++++++++---- 3 files changed, 27 insertions(+), 5 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README index d8613232..e9c48662 100644 --- a/custom_mutators/autotokens/README +++ b/custom_mutators/autotokens/README @@ -24,6 +24,8 @@ Do **not** set `AFL_DISABLE_TRIM` with this custom mutator! 0 = none 1 = only -x or autodict 2 = -x, autodict and `CMPLOG` +`AUTOTOKENS_CHANGE_MIN` - minimum number of mutations (1-256, default 8) +`AUTOTOKENS_CHANGE_MAX` - maximum number of mutations (1-4096, default 64) `AUTOTOKENS_CREATE_FROM_THIN_AIR` - if only one small start file is present and a dictionary loaded then create one initial structure based on the dictionary. diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO index 528dff1f..496bfd45 100644 --- a/custom_mutators/autotokens/TODO +++ b/custom_mutators/autotokens/TODO @@ -9,7 +9,6 @@ analyse welche einen DICT haben, und welche davon rein ascii corpus analyse: + libxml - - hardbuzz - sqlite - libpcap min len, max len, % wenn 95/98/99/100 ascii @@ -20,3 +19,6 @@ AFL_TXT_MAX_LEN 65535 AFL_TXT_MIN_LEN 16 AFL_TXT_MIN_PERCENT=99 +-> KEIN FAV! + +change_min/_max werte diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index d3ae7e9c..ee35c68b 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -32,7 +32,7 @@ extern "C" { #define AUTOTOKENS_CREATE_FROM_THIN_AIR 0 #define AUTOTOKENS_FUZZ_COUNT_SHIFT 0 // 0 = no learning, 1 only from -x dict/autodict, 2 also from cmplog -#define AUTOTOKENS_LEARN_DICT 2 +#define AUTOTOKENS_LEARN_DICT 1 #ifndef AUTOTOKENS_SPLICE_DISABLE #define AUTOTOKENS_SPLICE_DISABLE 0 #endif @@ -64,6 +64,8 @@ static int alternative_tokenize = AUTOTOKENS_ALTERNATIVE_TOKENIZE; static int learn_dictionary_tokens = AUTOTOKENS_LEARN_DICT; static int fuzz_count_shift = AUTOTOKENS_FUZZ_COUNT_SHIFT; static int create_from_thin_air = AUTOTOKENS_CREATE_FROM_THIN_AIR; +static int change_min = AUTOTOKENS_CHANGE_MIN; +static int change_max = AUTOTOKENS_CHANGE_MAX; static u32 current_id; static u32 valid_structures; static u32 whitespace_ids; @@ -151,8 +153,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, u32 i, m_size = (u32)m.size(); u32 rounds = - MIN(AUTOTOKENS_CHANGE_MAX, - MAX(AUTOTOKENS_CHANGE_MIN, + MIN(change_max, + MAX(change_min, MIN(m_size >> 3, HAVOC_CYCLES * afl_ptr->queue_cur->perf_score * afl_ptr->havoc_div / 256))); // DEBUGF(stderr, "structure size: %lu, rounds: %u \n", m.size(), rounds); @@ -1162,7 +1164,7 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { learn_dictionary_tokens = atoi(getenv("AUTOTOKENS_LEARN_DICT")); if (learn_dictionary_tokens < 0 || learn_dictionary_tokens > 2) { - learn_dictionary_tokens = 2; + learn_dictionary_tokens = AUTOTOKENS_LEARN_DICT; } @@ -1175,6 +1177,22 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { } + if (getenv("AUTOTOKENS_CHANGE_MIN")) { + + change_min = atoi(getenv("AUTOTOKENS_CHANGE_MIN")); + if (change_min < 1 || change_min > 256) { change_min = AUTOTOKENS_CHANGE_MIN; } + + } + + if (getenv("AUTOTOKENS_CHANGE_MAX")) { + + change_max = atoi(getenv("AUTOTOKENS_CHANGE_MAX")); + if (change_max < 1 || change_max > 4096) { change_max = AUTOTOKENS_CHANGE_MAX; } + + } + + if (change_max < change_min) { change_max = change_min + 1; } + if (getenv("AUTOTOKENS_WHITESPACE")) { whitespace = getenv("AUTOTOKENS_WHITESPACE"); -- cgit v1.2.3 From 7eaef449a1e92999c89df23ab474b3be3da595f8 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Mon, 13 Feb 2023 08:14:04 +0100 Subject: remove ALTERNATIVE_TOKENIZE --- custom_mutators/autotokens/autotokens.cpp | 522 ++++++++---------------------- 1 file changed, 136 insertions(+), 386 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index ee35c68b..a027ac2b 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -22,7 +22,6 @@ extern "C" { #define AUTOTOKENS_DEBUG 0 #define AUTOTOKENS_ONLY_FAV 0 -#define AUTOTOKENS_ALTERNATIVE_TOKENIZE 0 #define AUTOTOKENS_CHANGE_MIN 8 #define AUTOTOKENS_CHANGE_MAX 64 #define AUTOTOKENS_WHITESPACE " " @@ -60,7 +59,6 @@ typedef struct my_mutator { static afl_state *afl_ptr; static int debug = AUTOTOKENS_DEBUG; static int only_fav = AUTOTOKENS_ONLY_FAV; -static int alternative_tokenize = AUTOTOKENS_ALTERNATIVE_TOKENIZE; static int learn_dictionary_tokens = AUTOTOKENS_LEARN_DICT; static int fuzz_count_shift = AUTOTOKENS_FUZZ_COUNT_SHIFT; static int create_from_thin_air = AUTOTOKENS_CREATE_FROM_THIN_AIR; @@ -142,7 +140,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, (void)(data); - if (s == NULL) { + if (unlikely(s == NULL)) { *out_buf = NULL; return 0; @@ -183,9 +181,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } while (unlikely( new_item == cur_item || - (!alternative_tokenize && - ((whitespace_ids < new_item && whitespace_ids >= cur_item) || - (whitespace_ids >= new_item && whitespace_ids < cur_item))))); + ((whitespace_ids < new_item && whitespace_ids >= cur_item) || + (whitespace_ids >= new_item && whitespace_ids < cur_item)))); DEBUGF(stderr, "MUT: %u -> %u\n", cur_item, new_item); m[pos] = new_item; @@ -200,37 +197,33 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, new_item = rand_below(afl_ptr, current_id); - } while (unlikely(!alternative_tokenize && new_item >= whitespace_ids)); + } while (unlikely(new_item >= whitespace_ids)); u32 pos = rand_below(afl_ptr, m_size + 1); m.insert(m.begin() + pos, new_item); ++m_size; DEBUGF(stderr, "INS: %u at %u\n", new_item, pos); - if (likely(!alternative_tokenize)) { + // if we insert an identifier or string we might need whitespace + if (id_to_token[new_item].size() > 1) { - // if we insert an identifier or string we might need whitespace - if (id_to_token[new_item].size() > 1) { + // need to insert before? - // need to insert before? + if (pos && m[pos - 1] >= whitespace_ids && + id_to_token[m[pos - 1]].size() > 1) { - if (pos && m[pos - 1] >= whitespace_ids && - id_to_token[m[pos - 1]].size() > 1) { - - m.insert(m.begin() + pos, good_whitespace_or_singleval()); - ++m_size; - - } + m.insert(m.begin() + pos, good_whitespace_or_singleval()); + ++m_size; - if (pos + 1 < m_size && m[pos + 1] >= whitespace_ids && - id_to_token[m[pos + 1]].size() > 1) { + } - // need to insert after? + if (pos + 1 < m_size && m[pos + 1] >= whitespace_ids && + id_to_token[m[pos + 1]].size() > 1) { - m.insert(m.begin() + pos + 1, good_whitespace_or_singleval()); - ++m_size; + // need to insert after? - } + m.insert(m.begin() + pos + 1, good_whitespace_or_singleval()); + ++m_size; } @@ -290,26 +283,22 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } - if (likely(!alternative_tokenize)) { - - // do we need a whitespace/token at the beginning? - if (dst_off && id_to_token[m[dst_off - 1]].size() > 1 && - id_to_token[m[dst_off]].size() > 1) { + // do we need a whitespace/token at the beginning? + if (dst_off && id_to_token[m[dst_off - 1]].size() > 1 && + id_to_token[m[dst_off]].size() > 1) { - m.insert(m.begin() + dst_off, good_whitespace_or_singleval()); - ++m_size; + m.insert(m.begin() + dst_off, good_whitespace_or_singleval()); + ++m_size; - } + } - // do we need a whitespace/token at the end? - if (dst_off + n < m_size && - id_to_token[m[dst_off + n - 1]].size() > 1 && - id_to_token[m[dst_off + n]].size() > 1) { + // do we need a whitespace/token at the end? + if (dst_off + n < m_size && + id_to_token[m[dst_off + n - 1]].size() > 1 && + id_to_token[m[dst_off + n]].size() > 1) { - m.insert(m.begin() + dst_off + n, good_whitespace_or_singleval()); - ++m_size; - - } + m.insert(m.begin() + dst_off + n, good_whitespace_or_singleval()); + ++m_size; } @@ -332,8 +321,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, // if what we delete will result in a missing whitespace/token, // instead of deleting we switch the item to a whitespace or token. - if (likely(!alternative_tokenize) && pos && pos + 1 < m_size && - id_to_token[m[pos - 1]].size() > 1 && + if (pos && pos + 1 < m_size && id_to_token[m[pos - 1]].size() > 1 && id_to_token[m[pos + 1]].size() > 1) { m[pos] = good_whitespace_or_singleval(); @@ -362,17 +350,11 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } - u32 m_size_1 = m_size - 1; output = ""; for (i = 0; i < m_size; ++i) { output += id_to_token[m[i]]; - if (unlikely(alternative_tokenize && i < m_size_1)) { - - output += whitespace; - - } } @@ -725,109 +707,57 @@ extern "C" unsigned char afl_custom_queue_get(void *data, DEBUGF(stderr, "START!\n"); - if (likely(!alternative_tokenize)) { + while (my_search_string(cur, ende, &match_begin, &match_end)) { - while (my_search_string(cur, ende, &match_begin, &match_end)) { + prev = cur; + found = match_begin; + cur = match_end; - prev = cur; - found = match_begin; - cur = match_end; + IFDEBUG { - IFDEBUG { - - string foo(match_begin, match_end); - DEBUGF(stderr, - "string %s found at start %lu offset %lu continue at %lu\n", - foo.c_str(), prev - input.begin(), found - prev, - cur - input.begin()); - - } - - if (prev < found) { // there are items between search start and find - while (prev < found) { - - if (isspace(*prev)) { - - auto start = prev; - while (isspace(*prev)) { - - ++prev; - - } - - tokens.push_back(std::string(start, prev)); - DEBUGF(stderr, "WHITESPACE %ld \"%s\"\n", prev - start, - tokens[tokens.size() - 1].c_str()); - - } else if (isalnum(*prev) || *prev == '$' || *prev == '_') { - - auto start = prev; - while (isalnum(*prev) || *prev == '$' || *prev == '_' || - *prev == '.' || *prev == '/') { - - ++prev; - - } - - tokens.push_back(string(start, prev)); - DEBUGF(stderr, "IDENTIFIER %ld \"%s\"\n", prev - start, - tokens[tokens.size() - 1].c_str()); - - } else { - - tokens.push_back(string(prev, prev + 1)); - DEBUGF(stderr, "OTHER \"%c\"\n", *prev); - ++prev; - - } - - } - - } - - tokens.push_back(string(match_begin, match_end)); - DEBUGF(stderr, "TOK: %s\n", tokens[tokens.size() - 1].c_str()); + string foo(match_begin, match_end); + DEBUGF(stderr, + "string %s found at start %lu offset %lu continue at %lu\n", + foo.c_str(), prev - input.begin(), found - prev, + cur - input.begin()); } - DEBUGF(stderr, "AFTER all strings\n"); - - if (cur < ende) { + if (prev < found) { // there are items between search start and find + while (prev < found) { - while (cur < ende) { + if (isspace(*prev)) { - if (isspace(*cur)) { + auto start = prev; + while (isspace(*prev)) { - auto start = cur; - while (isspace(*cur)) { - - ++cur; + ++prev; } - tokens.push_back(std::string(start, cur)); - DEBUGF(stderr, "WHITESPACE %ld \"%s\"\n", cur - start, + tokens.push_back(std::string(start, prev)); + DEBUGF(stderr, "WHITESPACE %ld \"%s\"\n", prev - start, tokens[tokens.size() - 1].c_str()); - } else if (isalnum(*cur) || *cur == '$' || *cur == '_') { + } else if (isalnum(*prev) || *prev == '$' || *prev == '_') { - auto start = cur; - while (isalnum(*cur) || *cur == '$' || *cur == '_' || *cur == '.' || - *cur == '/') { + auto start = prev; + while (isalnum(*prev) || *prev == '$' || *prev == '_' || + *prev == '.' || *prev == '/') { - ++cur; + ++prev; } - tokens.push_back(std::string(start, cur)); - DEBUGF(stderr, "IDENTIFIER %ld \"%s\"\n", cur - start, + tokens.push_back(string(start, prev)); + DEBUGF(stderr, "IDENTIFIER %ld \"%s\"\n", prev - start, tokens[tokens.size() - 1].c_str()); } else { - tokens.push_back(std::string(cur, cur + 1)); - DEBUGF(stderr, "OTHER \"%c\"\n", *cur); - ++cur; + tokens.push_back(string(prev, prev + 1)); + DEBUGF(stderr, "OTHER \"%c\"\n", *prev); + ++prev; } @@ -835,226 +765,49 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - } else { - - // alternative tokenize - while (my_search_string(cur, ende, &match_begin, &match_end)) { - - prev = cur; - found = match_begin; - cur = match_end; - IFDEBUG { - - string foo(match_begin, match_end); - DEBUGF(stderr, - "string %s found at start %lu offset %lu continue at %lu\n", - foo.c_str(), prev - input.begin(), found - prev, - cur - input.begin()); - - } - - if (prev < found) { // there are items between search start and find - - sregex_token_iterator it{prev, found, regex_whitespace, -1}; - vector tokenized{it, {}}; - tokenized.erase(std::remove_if(tokenized.begin(), tokenized.end(), - [](std::string const &s) { - - return s.size() == 0; - - }), - - tokenized.end()); - tokens.reserve(tokens.size() + tokenized.size() * 2 + 1); - - IFDEBUG { - - DEBUGF(stderr, "tokens1: %lu input size: %lu\n", tokenized.size(), - input.size()); - for (auto x : tokenized) { - - cerr << x << endl; - - } - - } - - for (auto token : tokenized) { - - string::const_iterator c = token.begin(), e = token.end(), f, p; - smatch m; - - while (regex_search(c, e, m, regex_word)) { - - p = c; - f = m[0].first; - c = m[0].second; - if (p < f) { - - // there are items between search start and find - while (p < f) { - - IFDEBUG { + tokens.push_back(string(match_begin, match_end)); + DEBUGF(stderr, "TOK: %s\n", tokens[tokens.size() - 1].c_str()); - string foo(p, p + 1); - DEBUGF(stderr, "before string: \"%s\"\n", foo.c_str()); - - } - - tokens.push_back(std::string(p, p + 1)); - ++p; - - } - - IFDEBUG { - - string foo(p, f); - DEBUGF(stderr, "before string: \"%s\"\n", foo.c_str()); - tokens.push_back(std::string(p, f)); - - } - - } - - DEBUGF(stderr, - "SUBstring \"%s\" found at start %lu offset %lu continue " - "at %lu\n", - m[0].str().c_str(), p - input.begin(), m.position(), - c - token.begin()); - tokens.push_back(m[0].str()); - - } - - if (c < e) { - - while (c < e) { - - IFDEBUG { - - string foo(c, c + 1); - DEBUGF(stderr, "after string: \"%s\"\n", foo.c_str()); - - } - - tokens.push_back(std::string(c, c + 1)); - ++c; - - } - - IFDEBUG { - - string foo(c, e); - DEBUGF(stderr, "after string: \"%s\"\n", foo.c_str()); - - } - - tokens.push_back(std::string(c, e)); - - } - - } - - } - - tokens.push_back(string(match_begin, match_end)); + } - } + DEBUGF(stderr, "AFTER all strings\n"); - if (cur < ende) { + if (cur < ende) { - sregex_token_iterator it{cur, ende, regex_whitespace, -1}; - vector tokenized{it, {}}; - tokenized.erase( - std::remove_if(tokenized.begin(), tokenized.end(), - [](std::string const &s) { return s.size() == 0; }), - tokenized.end()); - tokens.reserve(tokens.size() + tokenized.size() * 2 + 1); + while (cur < ende) { - IFDEBUG { + if (isspace(*cur)) { - DEBUGF(stderr, "tokens2: %lu input size: %lu\n", tokenized.size(), - input.size()); - for (auto x : tokenized) { + auto start = cur; + while (isspace(*cur)) { - cerr << x << endl; + ++cur; } - } - - for (auto token : tokenized) { - - string::const_iterator c = token.begin(), e = token.end(), f, p; - smatch m; - - while (regex_search(c, e, m, regex_word)) { - - p = c; - f = m[0].first; - c = m[0].second; - if (p < f) { + tokens.push_back(std::string(start, cur)); + DEBUGF(stderr, "WHITESPACE %ld \"%s\"\n", cur - start, + tokens[tokens.size() - 1].c_str()); - // there are items between search start and find - while (p < f) { + } else if (isalnum(*cur) || *cur == '$' || *cur == '_') { - IFDEBUG { + auto start = cur; + while (isalnum(*cur) || *cur == '$' || *cur == '_' || *cur == '.' || + *cur == '/') { - string foo(p, p + 1); - DEBUGF(stderr, "before string: \"%s\"\n", foo.c_str()); - - } - - tokens.push_back(std::string(p, p + 1)); - ++p; - - } - - IFDEBUG { - - string foo(p, f); - DEBUGF(stderr, "before string: \"%s\"\n", foo.c_str()); - - } - - tokens.push_back(std::string(p, f)); - - } - - DEBUGF(stderr, - "SUB2string \"%s\" found at start %lu offset %lu continue " - "at %lu\n", - m[0].str().c_str(), p - input.begin(), m.position(), - c - token.begin()); - tokens.push_back(m[0].str()); + ++cur; } - if (c < e) { - - while (c < e) { - - IFDEBUG { - - string foo(c, c + 1); - DEBUGF(stderr, "after string: \"%s\"\n", foo.c_str()); - - } - - tokens.push_back(std::string(c, c + 1)); - ++c; - - } + tokens.push_back(std::string(start, cur)); + DEBUGF(stderr, "IDENTIFIER %ld \"%s\"\n", cur - start, + tokens[tokens.size() - 1].c_str()); - IFDEBUG { - - string foo(c, e); - DEBUGF(stderr, "after string: \"%s\"\n", foo.c_str()); - - } + } else { - tokens.push_back(std::string(c, e)); - - } + tokens.push_back(std::string(cur, cur + 1)); + DEBUGF(stderr, "OTHER \"%c\"\n", *cur); + ++cur; } @@ -1065,15 +818,9 @@ extern "C" unsigned char afl_custom_queue_get(void *data, IFDEBUG { DEBUGF(stderr, "DUMPING TOKENS:\n"); - u32 size_1 = tokens.size() - 1; for (u32 i = 0; i < tokens.size(); ++i) { DEBUGF(stderr, "%s", tokens[i].c_str()); - if (unlikely(alternative_tokenize && i < size_1)) { - - DEBUGF(stderr, "%s", whitespace.c_str()); - - } } @@ -1157,7 +904,6 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { if (getenv("AUTOTOKENS_DEBUG")) { debug = 1; } if (getenv("AUTOTOKENS_ONLY_FAV")) { only_fav = 1; } if (getenv("AUTOTOKENS_CREATE_FROM_THIN_AIR")) { create_from_thin_air = 1; } - if (getenv("AUTOTOKENS_ALTERNATIVE_TOKENIZE")) { alternative_tokenize = 1; } if (getenv("AUTOTOKENS_LEARN_DICT")) { @@ -1180,14 +926,22 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { if (getenv("AUTOTOKENS_CHANGE_MIN")) { change_min = atoi(getenv("AUTOTOKENS_CHANGE_MIN")); - if (change_min < 1 || change_min > 256) { change_min = AUTOTOKENS_CHANGE_MIN; } + if (change_min < 1 || change_min > 256) { + + change_min = AUTOTOKENS_CHANGE_MIN; + + } } if (getenv("AUTOTOKENS_CHANGE_MAX")) { change_max = atoi(getenv("AUTOTOKENS_CHANGE_MAX")); - if (change_max < 1 || change_max > 4096) { change_max = AUTOTOKENS_CHANGE_MAX; } + if (change_max < 1 || change_max > 4096) { + + change_max = AUTOTOKENS_CHANGE_MAX; + + } } @@ -1212,53 +966,49 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { // set common whitespace tokens // we deliberately do not put uncommon ones here to these will count as // identifier tokens. - if (!alternative_tokenize) { - - token_to_id[" "] = current_id; - id_to_token[current_id] = " "; - ++current_id; - token_to_id["\t"] = current_id; - id_to_token[current_id] = "\t"; - ++current_id; - token_to_id["\n"] = current_id; - id_to_token[current_id] = "\n"; - ++current_id; - token_to_id["\r\n"] = current_id; - id_to_token[current_id] = "\r\n"; - ++current_id; - token_to_id[" \n"] = current_id; - id_to_token[current_id] = " \n"; - ++current_id; - token_to_id[" "] = current_id; - id_to_token[current_id] = " "; - ++current_id; - token_to_id["\t\t"] = current_id; - id_to_token[current_id] = "\t\t"; - ++current_id; - token_to_id["\n\n"] = current_id; - id_to_token[current_id] = "\n\n"; - ++current_id; - token_to_id["\r\n\r\n"] = current_id; - id_to_token[current_id] = "\r\n\r\n"; - ++current_id; - token_to_id[" "] = current_id; - id_to_token[current_id] = " "; - ++current_id; - token_to_id["\t\t\t\t"] = current_id; - id_to_token[current_id] = "\t\t\t\t"; - ++current_id; - token_to_id["\n\n\n\n"] = current_id; - id_to_token[current_id] = "\n\n\n\n"; - ++current_id; - whitespace_ids = current_id; - token_to_id["\""] = current_id; - id_to_token[current_id] = "\""; - ++current_id; - token_to_id["'"] = current_id; - id_to_token[current_id] = "'"; - ++current_id; - - } + token_to_id[" "] = current_id; + id_to_token[current_id] = " "; + ++current_id; + token_to_id["\t"] = current_id; + id_to_token[current_id] = "\t"; + ++current_id; + token_to_id["\n"] = current_id; + id_to_token[current_id] = "\n"; + ++current_id; + token_to_id["\r\n"] = current_id; + id_to_token[current_id] = "\r\n"; + ++current_id; + token_to_id[" \n"] = current_id; + id_to_token[current_id] = " \n"; + ++current_id; + token_to_id[" "] = current_id; + id_to_token[current_id] = " "; + ++current_id; + token_to_id["\t\t"] = current_id; + id_to_token[current_id] = "\t\t"; + ++current_id; + token_to_id["\n\n"] = current_id; + id_to_token[current_id] = "\n\n"; + ++current_id; + token_to_id["\r\n\r\n"] = current_id; + id_to_token[current_id] = "\r\n\r\n"; + ++current_id; + token_to_id[" "] = current_id; + id_to_token[current_id] = " "; + ++current_id; + token_to_id["\t\t\t\t"] = current_id; + id_to_token[current_id] = "\t\t\t\t"; + ++current_id; + token_to_id["\n\n\n\n"] = current_id; + id_to_token[current_id] = "\n\n\n\n"; + ++current_id; + whitespace_ids = current_id; + token_to_id["\""] = current_id; + id_to_token[current_id] = "\""; + ++current_id; + token_to_id["'"] = current_id; + id_to_token[current_id] = "'"; + ++current_id; return data; -- cgit v1.2.3 From 240f6421d8240b4b4d4d5bd509c0c3277a083896 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Mon, 13 Feb 2023 08:23:47 +0100 Subject: optimize performance --- custom_mutators/autotokens/autotokens.cpp | 80 +++++++++---------------------- 1 file changed, 23 insertions(+), 57 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index a027ac2b..ca738d0b 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -204,31 +204,6 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, ++m_size; DEBUGF(stderr, "INS: %u at %u\n", new_item, pos); - // if we insert an identifier or string we might need whitespace - if (id_to_token[new_item].size() > 1) { - - // need to insert before? - - if (pos && m[pos - 1] >= whitespace_ids && - id_to_token[m[pos - 1]].size() > 1) { - - m.insert(m.begin() + pos, good_whitespace_or_singleval()); - ++m_size; - - } - - if (pos + 1 < m_size && m[pos + 1] >= whitespace_ids && - id_to_token[m[pos + 1]].size() > 1) { - - // need to insert after? - - m.insert(m.begin() + pos + 1, good_whitespace_or_singleval()); - ++m_size; - - } - - } - break; } @@ -283,25 +258,6 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } - // do we need a whitespace/token at the beginning? - if (dst_off && id_to_token[m[dst_off - 1]].size() > 1 && - id_to_token[m[dst_off]].size() > 1) { - - m.insert(m.begin() + dst_off, good_whitespace_or_singleval()); - ++m_size; - - } - - // do we need a whitespace/token at the end? - if (dst_off + n < m_size && - id_to_token[m[dst_off + n - 1]].size() > 1 && - id_to_token[m[dst_off + n]].size() > 1) { - - m.insert(m.begin() + dst_off + n, good_whitespace_or_singleval()); - ++m_size; - - } - break; } @@ -319,19 +275,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } while (unlikely(m[pos] < whitespace_ids)); - // if what we delete will result in a missing whitespace/token, - // instead of deleting we switch the item to a whitespace or token. - if (pos && pos + 1 < m_size && id_to_token[m[pos - 1]].size() > 1 && - id_to_token[m[pos + 1]].size() > 1) { - - m[pos] = good_whitespace_or_singleval(); - - } else { - - m.erase(m.begin() + pos); - --m_size; - - } + m.erase(m.begin() + pos); + --m_size; } else { @@ -350,10 +295,31 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } + /* Now we create the output */ + output = ""; + u32 prev_size = 0; for (i = 0; i < m_size; ++i) { + if (likely(i + 1 < m_size)) { + + u32 this_size = id_to_token[m[i]].size(); + + /* The output we are generating might need repairing. + General rule: two items that have a size larger than 2 are strings + or identifizers and need a whitespace or an item of length 1 in + between. */ + if (unlikely(prev_size > 1 && this_size > 1)) { + + output += id_to_token[good_whitespace_or_singleval()]; + + } + + prev_size = this_size; + + } + output += id_to_token[m[i]]; } -- cgit v1.2.3 From 61439859cece05cd3e204af60bb5ff08556c490d Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Mon, 13 Feb 2023 08:26:30 +0100 Subject: cleanup --- custom_mutators/autotokens/README | 4 ---- custom_mutators/autotokens/autotokens.cpp | 8 -------- 2 files changed, 12 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README index e9c48662..904b5fa3 100644 --- a/custom_mutators/autotokens/README +++ b/custom_mutators/autotokens/README @@ -29,7 +29,3 @@ Do **not** set `AFL_DISABLE_TRIM` with this custom mutator! `AUTOTOKENS_CREATE_FROM_THIN_AIR` - if only one small start file is present and a dictionary loaded then create one initial structure based on the dictionary. -`AUTOTOKENS_ALTERNATIVE_TOKENIZE` - use an alternative tokenize implementation - (experimental) -`AUTOTOKENS_WHITESPACE` - whitespace string to use for ALTERNATIVE_TOKENIZE, - default is " " diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index ca738d0b..10afa2c2 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -24,7 +24,6 @@ extern "C" { #define AUTOTOKENS_ONLY_FAV 0 #define AUTOTOKENS_CHANGE_MIN 8 #define AUTOTOKENS_CHANGE_MAX 64 -#define AUTOTOKENS_WHITESPACE " " #define AUTOTOKENS_SIZE_MIN 8 #define AUTOTOKENS_SPLICE_MIN 4 #define AUTOTOKENS_SPLICE_MAX 64 @@ -75,7 +74,6 @@ static unordered_map *> file_mapping; static unordered_map *> id_mapping; static unordered_map token_to_id; static unordered_map id_to_token; -static string whitespace = AUTOTOKENS_WHITESPACE; static string output; static regex *regex_comment_custom; // multiline requires g++-11 libs :( @@ -913,12 +911,6 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { if (change_max < change_min) { change_max = change_min + 1; } - if (getenv("AUTOTOKENS_WHITESPACE")) { - - whitespace = getenv("AUTOTOKENS_WHITESPACE"); - - } - if (getenv("AUTOTOKENS_COMMENT")) { char buf[256]; -- cgit v1.2.3 From 54fa78d32ce6779117a656c72f5c630713e7033f Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Mon, 13 Feb 2023 09:52:57 +0100 Subject: autodisable and better performance --- custom_mutators/autotokens/Makefile | 12 ++- custom_mutators/autotokens/TODO | 21 ----- custom_mutators/autotokens/autotokens.cpp | 143 +++++++++++++++++++++++------- 3 files changed, 120 insertions(+), 56 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/Makefile b/custom_mutators/autotokens/Makefile index ab1da4b6..6ee7d324 100644 --- a/custom_mutators/autotokens/Makefile +++ b/custom_mutators/autotokens/Makefile @@ -1,16 +1,22 @@ ifdef debug - CFLAGS += -fsanitize=address -Wall + CPPLAGS += -fsanitize=address + CXXFLAGS += -Wall + CC := clang CXX := clang++ endif ifdef DEBUG - CFLAGS += -fsanitize=address -Wall + CPPFLAGS += -fsanitize=address + CXXFLAGS += -Wall + CC := clang CXX := clang++ endif all: autotokens.so autotokens.so: autotokens.cpp - $(CXX) -g -O3 $(CFLAGS) -shared -fPIC -o autotokens.so -I../../include autotokens.cpp ../../src/afl-performance.o + $(CC) -D_STANDALONE_MODULE=1 -I../../include -g -O3 $(CPPFLAGS) -fPIC -c -o ./afl-fuzz-queue.o ../../src/afl-fuzz-queue.c + $(CC) -I../../include -g -O3 $(CPPFLAGS) -DBIN_PATH=\"dummy\" -Wno-pointer-sign -fPIC -c -o ./afl-common.o ../../src/afl-common.c + $(CXX) -Wno-deprecated -g -O3 $(CXXFLAGS) $(CPPFLAGS) -shared -fPIC -o autotokens.so -I../../include autotokens.cpp ./afl-fuzz-queue.o ../../src/afl-performance.o ./afl-common.o clean: rm -f autotokens.so *~ core diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO index 496bfd45..2e99e147 100644 --- a/custom_mutators/autotokens/TODO +++ b/custom_mutators/autotokens/TODO @@ -1,24 +1,3 @@ -create from thin air if no good seed after a cycle and dict large enough? -(static u32 no_of_struct_inputs;) - -splicing -> check if whitespace/token is needed - -whitespace/token check only AFTER mutation - -analyse welche einen DICT haben, und welche davon rein ascii - -corpus analyse: - + libxml - - sqlite - - libpcap -min len, max len, % wenn 95/98/99/100 ascii - env für menge an per mutation run -AFL_TXT_MAX_LEN 65535 -AFL_TXT_MIN_LEN 16 -AFL_TXT_MIN_PERCENT=99 - --> KEIN FAV! - change_min/_max werte diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 10afa2c2..cda90a38 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -25,10 +25,12 @@ extern "C" { #define AUTOTOKENS_CHANGE_MIN 8 #define AUTOTOKENS_CHANGE_MAX 64 #define AUTOTOKENS_SIZE_MIN 8 +#define AUTOTOKENS_SIZE_MAX 65535 #define AUTOTOKENS_SPLICE_MIN 4 #define AUTOTOKENS_SPLICE_MAX 64 #define AUTOTOKENS_CREATE_FROM_THIN_AIR 0 #define AUTOTOKENS_FUZZ_COUNT_SHIFT 0 +#define AUTOTOKENS_AUTO_DISABLE 0 // 0 = no learning, 1 only from -x dict/autodict, 2 also from cmplog #define AUTOTOKENS_LEARN_DICT 1 #ifndef AUTOTOKENS_SPLICE_DISABLE @@ -56,6 +58,8 @@ typedef struct my_mutator { #define IFDEBUG if (unlikely(debug)) static afl_state *afl_ptr; +static int module_disabled = 0; +static int auto_disable = AUTOTOKENS_AUTO_DISABLE; static int debug = AUTOTOKENS_DEBUG; static int only_fav = AUTOTOKENS_ONLY_FAV; static int learn_dictionary_tokens = AUTOTOKENS_LEARN_DICT; @@ -93,6 +97,99 @@ static void first_run(void *data) { (void)(data); + /* For auto-loading this module we check here if we can analyze from the + input if the inputs look like text inputs and disable the module if + not. */ + + if (afl_ptr->custom_only || !auto_disable) { return; } + + if (unlikely(afl_ptr->active_items == 1 && + afl_ptr->queue_cur->len < AFL_TXT_MIN_LEN)) { + + if (afl_ptr->extras_cnt > 8) { + + u32 valid = 0; + + while (extras_cnt < afl_ptr->extras_cnt) { + + u32 ok = 1, l = afl_ptr->extras[extras_cnt].len; + u8 *buf, *ptr = afl_ptr->extras[extras_cnt].data; + + for (u32 i = 0; i < l; ++i) { + + if (!isascii((int)ptr[i]) && !isprint((int)ptr[i])) { + + ok = 0; + break; + + } + + } + + if (ok) { + + buf = (u8 *)malloc(afl_ptr->extras[extras_cnt].len + 1); + memcpy(buf, afl_ptr->extras[extras_cnt].data, + afl_ptr->extras[extras_cnt].len); + buf[afl_ptr->extras[extras_cnt].len] = 0; + token_to_id[(char *)buf] = current_id; + id_to_token[current_id] = (char *)buf; + ++current_id; + ++valid; + + } + + ++extras_cnt; + + } + + if ((valid * 100) / afl_ptr->extras_cnt < 95) { module_disabled = 1; } + + } else { + + module_disabled = 1; + + } + + return; + + } + + u32 is_ascii = 0, valid = 0; + + for (u32 i = 0; i < afl_ptr->queued_items; ++i) { + + struct queue_entry *q; + + q = afl_ptr->queue_buf[i]; + + if (!q->disabled && q->len >= AUTOTOKENS_SIZE_MIN && + q->len <= AFL_TXT_MAX_LEN) { + + ++valid; + u8 *input = queue_testcase_get(afl_ptr, q); + + u32 valid_chars = 0; + for (u32 i = 0; i < q->len; ++i) { + + if (isascii((int)input[i]) || isprint((int)input[i])) { ++valid_chars; } + + } + + // we want at least 99% of text characters ... + if (((q->len * AFL_TXT_MIN_PERCENT) / 100) <= valid_chars) { + + ++is_ascii; + q->is_ascii = 1; + + } + + } + + } + + if ((is_ascii * 100) / valid < 70) { module_disabled = 1; } + } static u32 good_whitespace_or_singleval() { @@ -441,21 +538,25 @@ extern "C" unsigned char afl_custom_queue_get(void *data, is_first_run = 0; first_run(data); + if (module_disabled) { WARNF("Autotokens custom module is disabled."); } + } - if (unlikely(!afl_ptr->custom_only) && !create_from_thin_air && - ((afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) || - (only_fav && !afl_ptr->queue_cur->favored))) { + if (likely(module_disabled) || + (unlikely(!afl_ptr->custom_only) && !create_from_thin_air && + ((afl_ptr->shm.cmplog_mode && !afl_ptr->queue_cur->is_ascii) || + (only_fav && !afl_ptr->queue_cur->favored)))) { s = NULL; - DEBUGF(stderr, "cmplog not ascii or only_fav and not favorite\n"); + DEBUGF(stderr, + "cmplog not ascii or only_fav and not favorite or disabled\n"); return 1; } // check if there are new dictionary entries and add them to the tokens - if (likely(valid_structures || create_from_thin_air) && - learn_state < learn_dictionary_tokens) { + if (unlikely(learn_state < learn_dictionary_tokens) && + likely(valid_structures || create_from_thin_air)) { if (unlikely(!learn_state)) { learn_state = 1; } @@ -569,21 +670,10 @@ extern "C" unsigned char afl_custom_queue_get(void *data, if (entry == file_mapping.end()) { // this input file was not analyzed for tokens yet, so let's do it! - - FILE *fp = fopen((char *)filename, "rb"); - if (!fp) { - - s = NULL; - return 1; - - } // should not happen - - fseek(fp, 0, SEEK_END); - size_t len = (size_t)ftell(fp); + size_t len = afl_ptr->queue_cur->len; if (len < AFL_TXT_MIN_LEN) { - fclose(fp); file_mapping[fn] = structure; // NULL ptr so we don't read the file again s = NULL; DEBUGF(stderr, "Too short (%lu) %s\n", len, filename); @@ -591,7 +681,6 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } else if (len > AFL_TXT_MAX_LEN) { - fclose(fp); file_mapping[fn] = structure; // NULL ptr so we don't read the file again s = NULL; DEBUGF(stderr, "Too long (%lu) %s\n", len, filename); @@ -599,19 +688,8 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } - string input; - input.resize(len); - rewind(fp); - - if (fread((void *)input.data(), 1, len, fp) != len) { - - s = NULL; - DEBUGF(stderr, "Too short read %s\n", filename); - return 1; - - } - - fclose(fp); + u8 *input_buf = queue_testcase_get(afl_ptr, afl_ptr->queue_cur); + string input((char *)input_buf, afl_ptr->queue_cur->len); if (!afl_ptr->shm.cmplog_mode) { @@ -866,6 +944,7 @@ extern "C" my_mutator_t *afl_custom_init(afl_state *afl, unsigned int seed) { } if (getenv("AUTOTOKENS_DEBUG")) { debug = 1; } + if (getenv("AUTOTOKENS_AUTO_DISABLE")) { auto_disable = 1; } if (getenv("AUTOTOKENS_ONLY_FAV")) { only_fav = 1; } if (getenv("AUTOTOKENS_CREATE_FROM_THIN_AIR")) { create_from_thin_air = 1; } -- cgit v1.2.3 From 5a0100c6eece0d668c7040ec6e6ed3f59ef0d1ba Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Mon, 13 Feb 2023 10:01:02 +0100 Subject: add to readme --- custom_mutators/autotokens/README | 2 ++ 1 file changed, 2 insertions(+) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README index 904b5fa3..295cd736 100644 --- a/custom_mutators/autotokens/README +++ b/custom_mutators/autotokens/README @@ -20,6 +20,8 @@ Do **not** set `AFL_DISABLE_TRIM` with this custom mutator! removed. Default: `/* ... */` `AUTOTOKENS_FUZZ_COUNT_SHIFT` - reduce the number of fuzzing performed, shifting the value by this number set, e.g. 1. +`AUTOTOKENS_AUTO_DISABLE` - disable this module if the seeds are not ascii + (or no input and no (ascii) dictionary) `AUTOTOKENS_LEARN_DICT` - learn from dictionaries? 0 = none 1 = only -x or autodict -- cgit v1.2.3 From 668f5e1fa9c126bb8c751a6e4ef038ae60a442fa Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 15 Feb 2023 09:32:32 +0100 Subject: debug output --- custom_mutators/autotokens/Makefile | 8 ++++++-- custom_mutators/autotokens/autotokens.cpp | 17 ++++++++++++++++- 2 files changed, 22 insertions(+), 3 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/Makefile b/custom_mutators/autotokens/Makefile index 6ee7d324..0daba17d 100644 --- a/custom_mutators/autotokens/Makefile +++ b/custom_mutators/autotokens/Makefile @@ -13,10 +13,14 @@ endif all: autotokens.so -autotokens.so: autotokens.cpp +afl-fuzz-queue.o: ../../src/afl-fuzz-queue.c $(CC) -D_STANDALONE_MODULE=1 -I../../include -g -O3 $(CPPFLAGS) -fPIC -c -o ./afl-fuzz-queue.o ../../src/afl-fuzz-queue.c + +afl-common.o: ../../src/afl-common.c $(CC) -I../../include -g -O3 $(CPPFLAGS) -DBIN_PATH=\"dummy\" -Wno-pointer-sign -fPIC -c -o ./afl-common.o ../../src/afl-common.c + +autotokens.so: afl-fuzz-queue.o afl-common.o autotokens.cpp $(CXX) -Wno-deprecated -g -O3 $(CXXFLAGS) $(CPPFLAGS) -shared -fPIC -o autotokens.so -I../../include autotokens.cpp ./afl-fuzz-queue.o ../../src/afl-performance.o ./afl-common.o clean: - rm -f autotokens.so *~ core + rm -f autotokens.so *.o *~ core diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index cda90a38..043d9588 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -145,6 +145,9 @@ static void first_run(void *data) { if ((valid * 100) / afl_ptr->extras_cnt < 95) { module_disabled = 1; } + DEBUGF(stderr, "DICT: valid %u, total %u, %u < 95 == disable\n", valid, + afl_ptr->extras_cnt, (u32)((valid * 100) / afl_ptr->extras_cnt)); + } else { module_disabled = 1; @@ -190,6 +193,10 @@ static void first_run(void *data) { if ((is_ascii * 100) / valid < 70) { module_disabled = 1; } + DEBUGF(stderr, "seeds: total %u, valid %u, ascii %u, %u < 70 == disabled\n", + afl_ptr->active_items, valid, is_ascii, + (u32)((is_ascii * 100) / valid)); + } static u32 good_whitespace_or_singleval() { @@ -538,7 +545,15 @@ extern "C" unsigned char afl_custom_queue_get(void *data, is_first_run = 0; first_run(data); - if (module_disabled) { WARNF("Autotokens custom module is disabled."); } + if (module_disabled) { + + WARNF("Autotokens custom module is disabled."); + + } else if (auto_disable) { + + OKF("Autotokens custom module is enabled."); + + } } -- cgit v1.2.3 From 2090f17a9bb9cc225c1d24e8b21ed0c993a2665f Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 15 Feb 2023 11:23:42 +0100 Subject: opt --- custom_mutators/autotokens/autotokens.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 043d9588..a2b2814f 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -143,9 +143,9 @@ static void first_run(void *data) { } - if ((valid * 100) / afl_ptr->extras_cnt < 95) { module_disabled = 1; } + if ((valid * 100) / afl_ptr->extras_cnt <= 70) { module_disabled = 1; } - DEBUGF(stderr, "DICT: valid %u, total %u, %u < 95 == disable\n", valid, + DEBUGF(stderr, "DICT: valid %u, total %u, %u <= 70 == disable\n", valid, afl_ptr->extras_cnt, (u32)((valid * 100) / afl_ptr->extras_cnt)); } else { @@ -191,9 +191,9 @@ static void first_run(void *data) { } - if ((is_ascii * 100) / valid < 70) { module_disabled = 1; } + if ((is_ascii * 100) / valid <= 70) { module_disabled = 1; } - DEBUGF(stderr, "seeds: total %u, valid %u, ascii %u, %u < 70 == disabled\n", + DEBUGF(stderr, "seeds: total %u, valid %u, ascii %u, %u <= 70 == disabled\n", afl_ptr->active_items, valid, is_ascii, (u32)((is_ascii * 100) / valid)); -- cgit v1.2.3 From 04356ecbbe2c6cb72d279081702a6044fcc3ae92 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 15 Feb 2023 11:28:43 +0100 Subject: fix --- custom_mutators/autotokens/autotokens.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index a2b2814f..b1f1542e 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -145,8 +145,9 @@ static void first_run(void *data) { if ((valid * 100) / afl_ptr->extras_cnt <= 70) { module_disabled = 1; } - DEBUGF(stderr, "DICT: valid %u, total %u, %u <= 70 == disable\n", valid, - afl_ptr->extras_cnt, (u32)((valid * 100) / afl_ptr->extras_cnt)); + DEBUGF(stderr, "DICT: total %u, valid %u, %u <= 70 == disable\n", + afl_ptr->extras_cnt, valid, + (u32)((valid * 100) / afl_ptr->extras_cnt)); } else { -- cgit v1.2.3 From ae94499503596d1e7f45e1a93bc5f7148c6163b6 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 15 Feb 2023 11:48:49 +0100 Subject: fix --- custom_mutators/autotokens/autotokens.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index b1f1542e..e6b9931d 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -1078,6 +1078,8 @@ extern "C" void afl_custom_deinit(my_mutator_t *data) { /* we use this to print statistics at exit :-) needs to be stderr as stdout is filtered */ + if (module_disabled) { return; } + fprintf(stderr, "\n\nAutotoken mutator statistics:\n" " Number of all seen tokens: %u\n" -- cgit v1.2.3 From 7f2bafbb8b709720cd3703789071c08064e518bd Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 15 Feb 2023 11:54:39 +0100 Subject: remove some debug --- custom_mutators/autotokens/autotokens.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index e6b9931d..22c78a60 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -287,7 +287,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, ((whitespace_ids < new_item && whitespace_ids >= cur_item) || (whitespace_ids >= new_item && whitespace_ids < cur_item)))); - DEBUGF(stderr, "MUT: %u -> %u\n", cur_item, new_item); + // DEBUGF(stderr, "MUT: %u -> %u\n", cur_item, new_item); m[pos] = new_item; break; @@ -305,7 +305,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, u32 pos = rand_below(afl_ptr, m_size + 1); m.insert(m.begin() + pos, new_item); ++m_size; - DEBUGF(stderr, "INS: %u at %u\n", new_item, pos); + // DEBUGF(stderr, "INS: %u at %u\n", new_item, pos); break; @@ -334,7 +334,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, m.insert(m.begin() + dst_off, src->begin() + src_off, src->begin() + src_off + n); m_size += n; - DEBUGF(stderr, "SPLICE-INS: %u at %u\n", n, dst_off); + // DEBUGF(stderr, "SPLICE-INS: %u at %u\n", n, dst_off); break; @@ -354,7 +354,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, copy(src->begin() + src_off, src->begin() + src_off + n, m.begin() + dst_off); - DEBUGF(stderr, "SPLICE-MUT: %u at %u\n", n, dst_off); + // DEBUGF(stderr, "SPLICE-MUT: %u at %u\n", n, dst_off); break; } @@ -432,6 +432,7 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, if (unlikely(mutated_size > max_size)) { mutated_size = max_size; } + /* IFDEBUG { DEBUGF(stderr, "MUTATED to %u bytes:\n", mutated_size); @@ -440,6 +441,8 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, } + */ + *out_buf = mutated_out; ++fuzz_count; return mutated_size; @@ -633,7 +636,6 @@ extern "C" unsigned char afl_custom_queue_get(void *data, } ++a_extras_cnt; - DEBUGF(stderr, "Added from auto dictionary: \"%s\"\n", ptr); } @@ -751,8 +753,10 @@ extern "C" unsigned char afl_custom_queue_get(void *data, u32 tabs = count(input.begin(), input.end(), '\t'); u32 linefeeds = count(input.begin(), input.end(), '\n'); bool ends_with_linefeed = input[input.length() - 1] == '\n'; + DEBUGF(stderr, "spaces=%u tabs=%u linefeeds=%u ends=%u\n", spaces, tabs, linefeeds, ends_with_linefeed); + all_spaces += spaces; all_tabs += tabs; all_lf += linefeeds; -- cgit v1.2.3 From 1faf6f67313e726c645ac3b9ecd2d8b5e65f605a Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Thu, 16 Feb 2023 07:47:36 +0100 Subject: fix --- custom_mutators/autotokens/autotokens.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/autotokens.cpp b/custom_mutators/autotokens/autotokens.cpp index 22c78a60..8135aba1 100644 --- a/custom_mutators/autotokens/autotokens.cpp +++ b/custom_mutators/autotokens/autotokens.cpp @@ -401,25 +401,28 @@ extern "C" size_t afl_custom_fuzz(my_mutator_t *data, u8 *buf, size_t buf_size, /* Now we create the output */ output = ""; - u32 prev_size = 0; + u32 prev_size = 1, was_whitespace = 1; for (i = 0; i < m_size; ++i) { if (likely(i + 1 < m_size)) { u32 this_size = id_to_token[m[i]].size(); + u32 is_whitespace = m[i] < whitespace_ids; /* The output we are generating might need repairing. General rule: two items that have a size larger than 2 are strings or identifizers and need a whitespace or an item of length 1 in between. */ - if (unlikely(prev_size > 1 && this_size > 1)) { + if (unlikely(!(prev_size == 1 || was_whitespace || this_size == 1 || + is_whitespace))) { output += id_to_token[good_whitespace_or_singleval()]; } prev_size = this_size; + was_whitespace = is_whitespace; } -- cgit v1.2.3 From add2eb42c0f0e2b590fcb17427e5fce29c2fdd54 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Thu, 23 Feb 2023 15:26:41 +0100 Subject: nits --- custom_mutators/autotokens/README | 7 ++++--- custom_mutators/autotokens/TODO | 3 --- 2 files changed, 4 insertions(+), 6 deletions(-) delete mode 100644 custom_mutators/autotokens/TODO (limited to 'custom_mutators') diff --git a/custom_mutators/autotokens/README b/custom_mutators/autotokens/README index 295cd736..cca168fd 100644 --- a/custom_mutators/autotokens/README +++ b/custom_mutators/autotokens/README @@ -1,8 +1,9 @@ -# autotokens +# Autotokens This implements an improved autotoken grammar fuzzing idea presented in [Token-Level Fuzzing][https://www.usenix.org/system/files/sec21-salls.pdf]. -It is a grammar fuzzer without actually knowing the grammar. +It is a grammar fuzzer without actually knowing the grammar, but only works +with text based inputs. It is recommended to run with together in an instance with `CMPLOG`. @@ -19,7 +20,7 @@ Do **not** set `AFL_DISABLE_TRIM` with this custom mutator! `AUTOTOKENS_COMMENT` - what character or string starts a comment which will be removed. Default: `/* ... */` `AUTOTOKENS_FUZZ_COUNT_SHIFT` - reduce the number of fuzzing performed, shifting - the value by this number set, e.g. 1. + the value by this number, e.g. 1. `AUTOTOKENS_AUTO_DISABLE` - disable this module if the seeds are not ascii (or no input and no (ascii) dictionary) `AUTOTOKENS_LEARN_DICT` - learn from dictionaries? diff --git a/custom_mutators/autotokens/TODO b/custom_mutators/autotokens/TODO deleted file mode 100644 index 2e99e147..00000000 --- a/custom_mutators/autotokens/TODO +++ /dev/null @@ -1,3 +0,0 @@ -env für menge an per mutation run - -change_min/_max werte -- cgit v1.2.3 From e12acaa20367f335549c2db97b88ac5c8ffbeab7 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Sat, 15 Apr 2023 10:12:20 +0200 Subject: fix custom mutator C examples --- custom_mutators/examples/custom_mutator_helpers.h | 342 ---------------------- custom_mutators/examples/custom_send.c | 8 +- custom_mutators/examples/example.c | 116 +++----- custom_mutators/examples/post_library_gif.so.c | 53 +--- custom_mutators/examples/post_library_png.so.c | 24 +- custom_mutators/examples/simple_example.c | 28 +- 6 files changed, 72 insertions(+), 499 deletions(-) delete mode 100644 custom_mutators/examples/custom_mutator_helpers.h (limited to 'custom_mutators') diff --git a/custom_mutators/examples/custom_mutator_helpers.h b/custom_mutators/examples/custom_mutator_helpers.h deleted file mode 100644 index 62e6efba..00000000 --- a/custom_mutators/examples/custom_mutator_helpers.h +++ /dev/null @@ -1,342 +0,0 @@ -#ifndef CUSTOM_MUTATOR_HELPERS -#define CUSTOM_MUTATOR_HELPERS - -#include "config.h" -#include "types.h" -#include - -#define INITIAL_GROWTH_SIZE (64) - -#define RAND_BELOW(limit) (rand() % (limit)) - -/* Use in a struct: creates a name_buf and a name_size variable. */ -#define BUF_VAR(type, name) \ - type * name##_buf; \ - size_t name##_size; -/* this fills in `&structptr->something_buf, &structptr->something_size`. */ -#define BUF_PARAMS(struct, name) \ - (void **)&struct->name##_buf, &struct->name##_size - -typedef struct { - -} afl_t; - -static void surgical_havoc_mutate(u8 *out_buf, s32 begin, s32 end) { - - static s8 interesting_8[] = {INTERESTING_8}; - static s16 interesting_16[] = {INTERESTING_8, INTERESTING_16}; - static s32 interesting_32[] = {INTERESTING_8, INTERESTING_16, INTERESTING_32}; - - switch (RAND_BELOW(12)) { - - case 0: { - - /* Flip a single bit somewhere. Spooky! */ - - s32 bit_idx = ((RAND_BELOW(end - begin) + begin) << 3) + RAND_BELOW(8); - - out_buf[bit_idx >> 3] ^= 128 >> (bit_idx & 7); - - break; - - } - - case 1: { - - /* Set byte to interesting value. */ - - u8 val = interesting_8[RAND_BELOW(sizeof(interesting_8))]; - out_buf[(RAND_BELOW(end - begin) + begin)] = val; - - break; - - } - - case 2: { - - /* Set word to interesting value, randomly choosing endian. */ - - if (end - begin < 2) break; - - s32 byte_idx = (RAND_BELOW(end - begin) + begin); - - if (byte_idx >= end - 1) break; - - switch (RAND_BELOW(2)) { - - case 0: - *(u16 *)(out_buf + byte_idx) = - interesting_16[RAND_BELOW(sizeof(interesting_16) >> 1)]; - break; - case 1: - *(u16 *)(out_buf + byte_idx) = - SWAP16(interesting_16[RAND_BELOW(sizeof(interesting_16) >> 1)]); - break; - - } - - break; - - } - - case 3: { - - /* Set dword to interesting value, randomly choosing endian. */ - - if (end - begin < 4) break; - - s32 byte_idx = (RAND_BELOW(end - begin) + begin); - - if (byte_idx >= end - 3) break; - - switch (RAND_BELOW(2)) { - - case 0: - *(u32 *)(out_buf + byte_idx) = - interesting_32[RAND_BELOW(sizeof(interesting_32) >> 2)]; - break; - case 1: - *(u32 *)(out_buf + byte_idx) = - SWAP32(interesting_32[RAND_BELOW(sizeof(interesting_32) >> 2)]); - break; - - } - - break; - - } - - case 4: { - - /* Set qword to interesting value, randomly choosing endian. */ - - if (end - begin < 8) break; - - s32 byte_idx = (RAND_BELOW(end - begin) + begin); - - if (byte_idx >= end - 7) break; - - switch (RAND_BELOW(2)) { - - case 0: - *(u64 *)(out_buf + byte_idx) = - (s64)interesting_32[RAND_BELOW(sizeof(interesting_32) >> 2)]; - break; - case 1: - *(u64 *)(out_buf + byte_idx) = SWAP64( - (s64)interesting_32[RAND_BELOW(sizeof(interesting_32) >> 2)]); - break; - - } - - break; - - } - - case 5: { - - /* Randomly subtract from byte. */ - - out_buf[(RAND_BELOW(end - begin) + begin)] -= 1 + RAND_BELOW(ARITH_MAX); - - break; - - } - - case 6: { - - /* Randomly add to byte. */ - - out_buf[(RAND_BELOW(end - begin) + begin)] += 1 + RAND_BELOW(ARITH_MAX); - - break; - - } - - case 7: { - - /* Randomly subtract from word, random endian. */ - - if (end - begin < 2) break; - - s32 byte_idx = (RAND_BELOW(end - begin) + begin); - - if (byte_idx >= end - 1) break; - - if (RAND_BELOW(2)) { - - *(u16 *)(out_buf + byte_idx) -= 1 + RAND_BELOW(ARITH_MAX); - - } else { - - u16 num = 1 + RAND_BELOW(ARITH_MAX); - - *(u16 *)(out_buf + byte_idx) = - SWAP16(SWAP16(*(u16 *)(out_buf + byte_idx)) - num); - - } - - break; - - } - - case 8: { - - /* Randomly add to word, random endian. */ - - if (end - begin < 2) break; - - s32 byte_idx = (RAND_BELOW(end - begin) + begin); - - if (byte_idx >= end - 1) break; - - if (RAND_BELOW(2)) { - - *(u16 *)(out_buf + byte_idx) += 1 + RAND_BELOW(ARITH_MAX); - - } else { - - u16 num = 1 + RAND_BELOW(ARITH_MAX); - - *(u16 *)(out_buf + byte_idx) = - SWAP16(SWAP16(*(u16 *)(out_buf + byte_idx)) + num); - - } - - break; - - } - - case 9: { - - /* Randomly subtract from dword, random endian. */ - - if (end - begin < 4) break; - - s32 byte_idx = (RAND_BELOW(end - begin) + begin); - - if (byte_idx >= end - 3) break; - - if (RAND_BELOW(2)) { - - *(u32 *)(out_buf + byte_idx) -= 1 + RAND_BELOW(ARITH_MAX); - - } else { - - u32 num = 1 + RAND_BELOW(ARITH_MAX); - - *(u32 *)(out_buf + byte_idx) = - SWAP32(SWAP32(*(u32 *)(out_buf + byte_idx)) - num); - - } - - break; - - } - - case 10: { - - /* Randomly add to dword, random endian. */ - - if (end - begin < 4) break; - - s32 byte_idx = (RAND_BELOW(end - begin) + begin); - - if (byte_idx >= end - 3) break; - - if (RAND_BELOW(2)) { - - *(u32 *)(out_buf + byte_idx) += 1 + RAND_BELOW(ARITH_MAX); - - } else { - - u32 num = 1 + RAND_BELOW(ARITH_MAX); - - *(u32 *)(out_buf + byte_idx) = - SWAP32(SWAP32(*(u32 *)(out_buf + byte_idx)) + num); - - } - - break; - - } - - case 11: { - - /* Just set a random byte to a random value. Because, - why not. We use XOR with 1-255 to eliminate the - possibility of a no-op. */ - - out_buf[(RAND_BELOW(end - begin) + begin)] ^= 1 + RAND_BELOW(255); - - break; - - } - - } - -} - -/* This function calculates the next power of 2 greater or equal its argument. - @return The rounded up power of 2 (if no overflow) or 0 on overflow. -*/ -static inline size_t next_pow2(size_t in) { - - if (in == 0 || in > (size_t)-1) - return 0; /* avoid undefined behaviour under-/overflow */ - size_t out = in - 1; - out |= out >> 1; - out |= out >> 2; - out |= out >> 4; - out |= out >> 8; - out |= out >> 16; - return out + 1; - -} - -/* This function makes sure *size is > size_needed after call. - It will realloc *buf otherwise. - *size will grow exponentially as per: - https://blog.mozilla.org/nnethercote/2014/11/04/please-grow-your-buffers-exponentially/ - Will return NULL and free *buf if size_needed is <1 or realloc failed. - @return For convenience, this function returns *buf. - */ -static inline void *maybe_grow(void **buf, size_t *size, size_t size_needed) { - - /* No need to realloc */ - if (likely(size_needed && *size >= size_needed)) return *buf; - - /* No initial size was set */ - if (size_needed < INITIAL_GROWTH_SIZE) size_needed = INITIAL_GROWTH_SIZE; - - /* grow exponentially */ - size_t next_size = next_pow2(size_needed); - - /* handle overflow */ - if (!next_size) { next_size = size_needed; } - - /* alloc */ - *buf = realloc(*buf, next_size); - *size = *buf ? next_size : 0; - - return *buf; - -} - -/* Swaps buf1 ptr and buf2 ptr, as well as their sizes */ -static inline void afl_swap_bufs(void **buf1, size_t *size1, void **buf2, - size_t *size2) { - - void * scratch_buf = *buf1; - size_t scratch_size = *size1; - *buf1 = *buf2; - *size1 = *size2; - *buf2 = scratch_buf; - *size2 = scratch_size; - -} - -#undef INITIAL_GROWTH_SIZE - -#endif - diff --git a/custom_mutators/examples/custom_send.c b/custom_mutators/examples/custom_send.c index 7de72819..9cc4b160 100644 --- a/custom_mutators/examples/custom_send.c +++ b/custom_mutators/examples/custom_send.c @@ -10,21 +10,21 @@ // afl-fuzz -i in -o out -- ./test-instr -f /tmp/foo // -#include "custom_mutator_helpers.h" - #include #include #include #include #include +#include "afl-fuzz.h" + typedef struct my_mutator { - afl_t *afl; + afl_state_t *afl; } my_mutator_t; -my_mutator_t *afl_custom_init(afl_t *afl, unsigned int seed) { +my_mutator_t *afl_custom_init(afl_state_t *afl, unsigned int seed) { my_mutator_t *data = calloc(1, sizeof(my_mutator_t)); if (!data) { diff --git a/custom_mutators/examples/example.c b/custom_mutators/examples/example.c index e680ec8e..42c7469c 100644 --- a/custom_mutators/examples/example.c +++ b/custom_mutators/examples/example.c @@ -7,7 +7,7 @@ */ // You need to use -I/path/to/AFLplusplus/include -I. -#include "custom_mutator_helpers.h" +#include "afl-fuzz.h" #include #include @@ -26,19 +26,14 @@ static const char *commands[] = { typedef struct my_mutator { - afl_t *afl; + afl_state_t *afl; // any additional data here! size_t trim_size_current; int trimmming_steps; int cur_step; - // Reused buffers: - BUF_VAR(u8, fuzz); - BUF_VAR(u8, data); - BUF_VAR(u8, havoc); - BUF_VAR(u8, trim); - BUF_VAR(u8, post_process); + u8 *mutated_out, *post_process_buf, *trim_buf; } my_mutator_t; @@ -53,7 +48,7 @@ typedef struct my_mutator { * There may be multiple instances of this mutator in one afl-fuzz run! * Return NULL on error. */ -my_mutator_t *afl_custom_init(afl_t *afl, unsigned int seed) { +my_mutator_t *afl_custom_init(afl_state_t *afl, unsigned int seed) { srand(seed); // needed also by surgical_havoc_mutate() @@ -65,6 +60,27 @@ my_mutator_t *afl_custom_init(afl_t *afl, unsigned int seed) { } + if ((data->mutated_out = (u8 *)malloc(MAX_FILE)) == NULL) { + + perror("afl_custom_init malloc"); + return NULL; + + } + + if ((data->post_process_buf = (u8 *)malloc(MAX_FILE)) == NULL) { + + perror("afl_custom_init malloc"); + return NULL; + + } + + if ((data->trim_buf = (u8 *)malloc(MAX_FILE)) == NULL) { + + perror("afl_custom_init malloc"); + return NULL; + + } + data->afl = afl; return data; @@ -96,31 +112,14 @@ size_t afl_custom_fuzz(my_mutator_t *data, uint8_t *buf, size_t buf_size, // the fuzzer size_t mutated_size = DATA_SIZE <= max_size ? DATA_SIZE : max_size; - // maybe_grow is optimized to be quick for reused buffers. - u8 *mutated_out = maybe_grow(BUF_PARAMS(data, fuzz), mutated_size); - if (!mutated_out) { - - *out_buf = NULL; - perror("custom mutator allocation (maybe_grow)"); - return 0; /* afl-fuzz will very likely error out after this. */ - - } + memcpy(data->mutated_out, buf, buf_size); // Randomly select a command string to add as a header to the packet - memcpy(mutated_out, commands[rand() % 3], 3); + memcpy(data->mutated_out, commands[rand() % 3], 3); - // Mutate the payload of the packet - int i; - for (i = 0; i < 8; ++i) { + if (mutated_size > max_size) { mutated_size = max_size; } - // Randomly perform one of the (no len modification) havoc mutations - surgical_havoc_mutate(mutated_out, 3, mutated_size); - - } - - if (max_size > mutated_size) { mutated_size = max_size; } - - *out_buf = mutated_out; + *out_buf = data->mutated_out; return mutated_size; } @@ -144,24 +143,16 @@ size_t afl_custom_fuzz(my_mutator_t *data, uint8_t *buf, size_t buf_size, size_t afl_custom_post_process(my_mutator_t *data, uint8_t *buf, size_t buf_size, uint8_t **out_buf) { - uint8_t *post_process_buf = - maybe_grow(BUF_PARAMS(data, post_process), buf_size + 5); - if (!post_process_buf) { + if (buf_size + 5 > MAX_FILE) { buf_size = MAX_FILE - 5; } - perror("custom mutator realloc failed."); - *out_buf = NULL; - return 0; + memcpy(data->post_process_buf + 5, buf, buf_size); + data->post_process_buf[0] = 'A'; + data->post_process_buf[1] = 'F'; + data->post_process_buf[2] = 'L'; + data->post_process_buf[3] = '+'; + data->post_process_buf[4] = '+'; - } - - memcpy(post_process_buf + 5, buf, buf_size); - post_process_buf[0] = 'A'; - post_process_buf[1] = 'F'; - post_process_buf[2] = 'L'; - post_process_buf[3] = '+'; - post_process_buf[4] = '+'; - - *out_buf = post_process_buf; + *out_buf = data->post_process_buf; return buf_size + 5; @@ -197,13 +188,6 @@ int32_t afl_custom_init_trim(my_mutator_t *data, uint8_t *buf, data->cur_step = 0; - if (!maybe_grow(BUF_PARAMS(data, trim), buf_size)) { - - perror("init_trim grow"); - return -1; - - } - memcpy(data->trim_buf, buf, buf_size); data->trim_size_current = buf_size; @@ -284,27 +268,11 @@ int32_t afl_custom_post_trim(my_mutator_t *data, int success) { size_t afl_custom_havoc_mutation(my_mutator_t *data, u8 *buf, size_t buf_size, u8 **out_buf, size_t max_size) { - if (buf_size == 0) { - - *out_buf = maybe_grow(BUF_PARAMS(data, havoc), 1); - if (!*out_buf) { - - perror("custom havoc: maybe_grow"); - return 0; - - } + *out_buf = buf; // in-place mutation - **out_buf = rand() % 256; - buf_size = 1; - - } else { - - // We reuse buf here. It's legal and faster. - *out_buf = buf; - - } + if (buf_size <= sizeof(size_t)) { return buf_size; } - size_t victim = rand() % buf_size; + size_t victim = rand() % (buf_size - sizeof(size_t)); (*out_buf)[victim] += rand() % 10; return buf_size; @@ -371,9 +339,7 @@ uint8_t afl_custom_queue_new_entry(my_mutator_t *data, void afl_custom_deinit(my_mutator_t *data) { free(data->post_process_buf); - free(data->havoc_buf); - free(data->data_buf); - free(data->fuzz_buf); + free(data->mutated_out); free(data->trim_buf); free(data); diff --git a/custom_mutators/examples/post_library_gif.so.c b/custom_mutators/examples/post_library_gif.so.c index 3cb018a6..6737c627 100644 --- a/custom_mutators/examples/post_library_gif.so.c +++ b/custom_mutators/examples/post_library_gif.so.c @@ -45,9 +45,8 @@ 1) If you don't want to modify the test case, simply set `*out_buf = in_buf` and return the original `len`. - NOTE: the following is currently NOT true, we abort in this case! 2) If you want to skip this test case altogether and have AFL generate a - new one, return 0 or set `*out_buf = NULL`. + new one, return 0. Use this sparingly - it's faster than running the target program with patently useless inputs, but still wastes CPU time. @@ -59,8 +58,6 @@ Note that the buffer will *not* be freed for you. To avoid memory leaks, you need to free it or reuse it on subsequent calls (as shown below). - *** Feel free to reuse the original 'in_buf' BUFFER and return it. *** - Alright. The example below shows a simple postprocessor that tries to make sure that all input files start with "GIF89a". @@ -72,7 +69,7 @@ #include #include #include -#include "alloc-inl.h" +#include "afl-fuzz.h" /* Header that must be present at the beginning of every test case: */ @@ -80,8 +77,7 @@ typedef struct post_state { - unsigned char *buf; - size_t size; + size_t size; } post_state_t; @@ -95,15 +91,6 @@ void *afl_custom_init(void *afl) { } - state->buf = calloc(sizeof(unsigned char), 4096); - if (!state->buf) { - - free(state); - perror("calloc"); - return NULL; - - } - return state; } @@ -113,6 +100,10 @@ void *afl_custom_init(void *afl) { size_t afl_custom_post_process(post_state_t *data, unsigned char *in_buf, unsigned int len, unsigned char **out_buf) { + /* we do in-place modification as we do not increase the size */ + + *out_buf = in_buf; + /* Skip execution altogether for buffers shorter than 6 bytes (just to show how it's done). We can trust len to be sane. */ @@ -120,34 +111,7 @@ size_t afl_custom_post_process(post_state_t *data, unsigned char *in_buf, /* Do nothing for buffers that already start with the expected header. */ - if (!memcmp(in_buf, HEADER, strlen(HEADER))) { - - *out_buf = in_buf; - return len; - - } - - /* Allocate memory for new buffer, reusing previous allocation if - possible. Note we have to use afl-fuzz's own realloc! - We use afl_realloc because it is effective. - You can also work within in_buf, and assign it to *out_buf. */ - - *out_buf = afl_realloc(out_buf, len); - - /* If we're out of memory, the most graceful thing to do is to return the - original buffer and give up on modifying it. Let AFL handle OOM on its - own later on. */ - - if (!*out_buf) { - - *out_buf = in_buf; - return len; - - } - - if (len > strlen(HEADER)) - memcpy(*out_buf + strlen(HEADER), in_buf + strlen(HEADER), - len - strlen(HEADER)); + if (!memcmp(in_buf, HEADER, strlen(HEADER))) { return len; } /* Insert the new header. */ @@ -162,7 +126,6 @@ size_t afl_custom_post_process(post_state_t *data, unsigned char *in_buf, /* Gets called afterwards */ void afl_custom_deinit(post_state_t *data) { - free(data->buf); free(data); } diff --git a/custom_mutators/examples/post_library_png.so.c b/custom_mutators/examples/post_library_png.so.c index cd65b1bc..652da497 100644 --- a/custom_mutators/examples/post_library_png.so.c +++ b/custom_mutators/examples/post_library_png.so.c @@ -30,7 +30,7 @@ #include #include #include -#include "alloc-inl.h" +#include "afl-fuzz.h" /* A macro to round an integer up to 4 kB. */ @@ -53,7 +53,7 @@ void *afl_custom_init(void *afl) { } - state->buf = calloc(sizeof(unsigned char), 4096); + state->buf = calloc(sizeof(unsigned char), MAX_FILE); if (!state->buf) { free(state); @@ -80,21 +80,7 @@ size_t afl_custom_post_process(post_state_t *data, const unsigned char *in_buf, } - /* This is not a good way to do it, if you do not need to grow the buffer - then just work with in_buf instead for speed reasons. - But we want to show how to grow a buffer, so this is how it's done: */ - - unsigned int pos = 8; - unsigned char *new_buf = afl_realloc(out_buf, UP4K(len)); - - if (!new_buf) { - - *out_buf = in_buf; - return len; - - } - - memcpy(new_buf, in_buf, len); + unsigned int pos = 8; /* Minimum size of a zero-length PNG chunk is 12 bytes; if we don't have that, we can bail out. */ @@ -124,7 +110,7 @@ size_t afl_custom_post_process(post_state_t *data, const unsigned char *in_buf, if (real_cksum != file_cksum) { - *(uint32_t *)(new_buf + pos + 8 + chunk_len) = real_cksum; + *(uint32_t *)(data->buf + pos + 8 + chunk_len) = real_cksum; } @@ -134,7 +120,7 @@ size_t afl_custom_post_process(post_state_t *data, const unsigned char *in_buf, } - *out_buf = new_buf; + *out_buf = data->buf; return len; } diff --git a/custom_mutators/examples/simple_example.c b/custom_mutators/examples/simple_example.c index d888ec1f..2c0abe29 100644 --- a/custom_mutators/examples/simple_example.c +++ b/custom_mutators/examples/simple_example.c @@ -1,6 +1,6 @@ // This simple example just creates random buffer <= 100 filled with 'A' // needs -I /path/to/AFLplusplus/include -#include "custom_mutator_helpers.h" +#include "afl-fuzz.h" #include #include @@ -13,14 +13,14 @@ typedef struct my_mutator { - afl_t *afl; + afl_state_t *afl; // Reused buffers: - BUF_VAR(u8, fuzz); + u8 *fuzz_buf; } my_mutator_t; -my_mutator_t *afl_custom_init(afl_t *afl, unsigned int seed) { +my_mutator_t *afl_custom_init(afl_state_t *afl, unsigned int seed) { srand(seed); my_mutator_t *data = calloc(1, sizeof(my_mutator_t)); @@ -31,6 +31,14 @@ my_mutator_t *afl_custom_init(afl_t *afl, unsigned int seed) { } + data->fuzz_buf = (u8 *)malloc(MAX_FILE); + if (!data->fuzz_buf) { + + perror("afl_custom_init malloc"); + return NULL; + + } + data->afl = afl; return data; @@ -44,18 +52,10 @@ size_t afl_custom_fuzz(my_mutator_t *data, uint8_t *buf, size_t buf_size, int size = (rand() % 100) + 1; if (size > max_size) size = max_size; - u8 *mutated_out = maybe_grow(BUF_PARAMS(data, fuzz), size); - if (!mutated_out) { - - *out_buf = NULL; - perror("custom mutator allocation (maybe_grow)"); - return 0; /* afl-fuzz will very likely error out after this. */ - - } - memset(mutated_out, _FIXED_CHAR, size); + memset(data->fuzz_buf, _FIXED_CHAR, size); - *out_buf = mutated_out; + *out_buf = data->fuzz_buf; return size; } -- cgit v1.2.3