diff options
author | vanhauser-thc <vh@thc.org> | 2021-10-17 13:05:33 +0200 |
---|---|---|
committer | vanhauser-thc <vh@thc.org> | 2021-10-17 13:05:33 +0200 |
commit | ed10f3783bd8fab33ab5750f56bf87ed008f28ed (patch) | |
tree | 089ce2615c348812175533e9e07a5bb0ba092551 /src/afl-fuzz-queue.c | |
parent | 34f1074ba308e850feb08c51aad781f7d307a260 (diff) | |
download | afl++-ed10f3783bd8fab33ab5750f56bf87ed008f28ed.tar.gz |
new rtn cmplog: instrumentation side + supporting functions
Diffstat (limited to 'src/afl-fuzz-queue.c')
-rw-r--r-- | src/afl-fuzz-queue.c | 91 |
1 files changed, 90 insertions, 1 deletions
diff --git a/src/afl-fuzz-queue.c b/src/afl-fuzz-queue.c index 16af2c6b..718f7cb6 100644 --- a/src/afl-fuzz-queue.c +++ b/src/afl-fuzz-queue.c @@ -315,7 +315,96 @@ void mark_as_redundant(afl_state_t *afl, struct queue_entry *q, u8 state) { } -/* check if ascii or UTF-8 */ +/* check if pointer is ascii or UTF-8 */ + +u8 check_if_text_buf(u8 *buf, u32 len) { + + u32 offset = 0, ascii = 0, utf8 = 0; + + while (offset < len) { + + // ASCII: <= 0x7F to allow ASCII control characters + if ((buf[offset + 0] == 0x09 || buf[offset + 0] == 0x0A || + buf[offset + 0] == 0x0D || + (0x20 <= buf[offset + 0] && buf[offset + 0] <= 0x7E))) { + + offset++; + utf8++; + ascii++; + continue; + + } + + if (isascii((int)buf[offset]) || isprint((int)buf[offset])) { + + ascii++; + // we continue though as it can also be a valid utf8 + + } + + // non-overlong 2-byte + if (len - offset > 1 && + ((0xC2 <= buf[offset + 0] && buf[offset + 0] <= 0xDF) && + (0x80 <= buf[offset + 1] && buf[offset + 1] <= 0xBF))) { + + offset += 2; + utf8++; + continue; + + } + + // excluding overlongs + if ((len - offset > 2) && + ((buf[offset + 0] == 0xE0 && + (0xA0 <= buf[offset + 1] && buf[offset + 1] <= 0xBF) && + (0x80 <= buf[offset + 2] && + buf[offset + 2] <= 0xBF)) || // straight 3-byte + (((0xE1 <= buf[offset + 0] && buf[offset + 0] <= 0xEC) || + buf[offset + 0] == 0xEE || buf[offset + 0] == 0xEF) && + (0x80 <= buf[offset + 1] && buf[offset + 1] <= 0xBF) && + (0x80 <= buf[offset + 2] && + buf[offset + 2] <= 0xBF)) || // excluding surrogates + (buf[offset + 0] == 0xED && + (0x80 <= buf[offset + 1] && buf[offset + 1] <= 0x9F) && + (0x80 <= buf[offset + 2] && buf[offset + 2] <= 0xBF)))) { + + offset += 3; + utf8++; + continue; + + } + + // planes 1-3 + if ((len - offset > 3) && + ((buf[offset + 0] == 0xF0 && + (0x90 <= buf[offset + 1] && buf[offset + 1] <= 0xBF) && + (0x80 <= buf[offset + 2] && buf[offset + 2] <= 0xBF) && + (0x80 <= buf[offset + 3] && + buf[offset + 3] <= 0xBF)) || // planes 4-15 + ((0xF1 <= buf[offset + 0] && buf[offset + 0] <= 0xF3) && + (0x80 <= buf[offset + 1] && buf[offset + 1] <= 0xBF) && + (0x80 <= buf[offset + 2] && buf[offset + 2] <= 0xBF) && + (0x80 <= buf[offset + 3] && buf[offset + 3] <= 0xBF)) || // plane 16 + (buf[offset + 0] == 0xF4 && + (0x80 <= buf[offset + 1] && buf[offset + 1] <= 0x8F) && + (0x80 <= buf[offset + 2] && buf[offset + 2] <= 0xBF) && + (0x80 <= buf[offset + 3] && buf[offset + 3] <= 0xBF)))) { + + offset += 4; + utf8++; + continue; + + } + + offset++; + + } + + return (utf8 > ascii ? utf8 : ascii); + +} + +/* check if queue entry is ascii or UTF-8 */ static u8 check_if_text(afl_state_t *afl, struct queue_entry *q) { |