From ed10f3783bd8fab33ab5750f56bf87ed008f28ed Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Sun, 17 Oct 2021 13:05:33 +0200 Subject: new rtn cmplog: instrumentation side + supporting functions --- src/afl-fuzz-one.c | 25 +++++++++++---- src/afl-fuzz-queue.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++- src/afl-fuzz-stats.c | 3 +- src/afl-fuzz.c | 14 ++++++++ 4 files changed, 125 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/afl-fuzz-one.c b/src/afl-fuzz-one.c index 17749601..3217fb0f 100644 --- a/src/afl-fuzz-one.c +++ b/src/afl-fuzz-one.c @@ -448,11 +448,11 @@ u8 fuzz_one_original(afl_state_t *afl) { ACTF( "Fuzzing test case #%u (%u total, %llu uniq crashes found, " - "perf_score=%0.0f, exec_us=%llu, hits=%u, map=%u)...", + "perf_score=%0.0f, exec_us=%llu, hits=%u, map=%u, ascii=%u)...", afl->current_entry, afl->queued_paths, afl->unique_crashes, afl->queue_cur->perf_score, afl->queue_cur->exec_us, likely(afl->n_fuzz) ? afl->n_fuzz[afl->queue_cur->n_fuzz_entry] : 0, - afl->queue_cur->bitmap_size); + afl->queue_cur->bitmap_size, afl->queue_cur->is_ascii); fflush(stdout); } @@ -2003,11 +2003,16 @@ havoc_stage: where we take the input file and make random stacked tweaks. */ #define MAX_HAVOC_ENTRY 59 /* 55 to 60 */ +#define MUTATE_ASCII_DICT 64 u32 r_max, r; r_max = (MAX_HAVOC_ENTRY + 1) + (afl->extras_cnt ? 4 : 0) + - (afl->a_extras_cnt ? 4 : 0); + (afl->a_extras_cnt + ? (unlikely(afl->cmplog_binary && afl->queue_cur->is_ascii) + ? MUTATE_ASCII_DICT + : 4) + : 0); if (unlikely(afl->expand_havoc && afl->ready_for_splicing_count > 1)) { @@ -2592,7 +2597,15 @@ havoc_stage: if (afl->a_extras_cnt) { - if (r < 2) { + u32 r_cmp = 2; + + if (unlikely(afl->cmplog_binary && afl->queue_cur->is_ascii)) { + + r_cmp = MUTATE_ASCII_DICT >> 1; + + } + + if (r < r_cmp) { /* Use the dictionary. */ @@ -2612,7 +2625,7 @@ havoc_stage: break; - } else if (r < 4) { + } else if (r < (r_cmp << 1)) { u32 use_extra = rand_below(afl, afl->a_extras_cnt); u32 extra_len = afl->a_extras[use_extra].len; @@ -2641,7 +2654,7 @@ havoc_stage: } else { - r -= 4; + r -= (r_cmp << 1); } diff --git a/src/afl-fuzz-queue.c b/src/afl-fuzz-queue.c index 16af2c6b..718f7cb6 100644 --- a/src/afl-fuzz-queue.c +++ b/src/afl-fuzz-queue.c @@ -315,7 +315,96 @@ void mark_as_redundant(afl_state_t *afl, struct queue_entry *q, u8 state) { } -/* check if ascii or UTF-8 */ +/* check if pointer is ascii or UTF-8 */ + +u8 check_if_text_buf(u8 *buf, u32 len) { + + u32 offset = 0, ascii = 0, utf8 = 0; + + while (offset < len) { + + // ASCII: <= 0x7F to allow ASCII control characters + if ((buf[offset + 0] == 0x09 || buf[offset + 0] == 0x0A || + buf[offset + 0] == 0x0D || + (0x20 <= buf[offset + 0] && buf[offset + 0] <= 0x7E))) { + + offset++; + utf8++; + ascii++; + continue; + + } + + if (isascii((int)buf[offset]) || isprint((int)buf[offset])) { + + ascii++; + // we continue though as it can also be a valid utf8 + + } + + // non-overlong 2-byte + if (len - offset > 1 && + ((0xC2 <= buf[offset + 0] && buf[offset + 0] <= 0xDF) && + (0x80 <= buf[offset + 1] && buf[offset + 1] <= 0xBF))) { + + offset += 2; + utf8++; + continue; + + } + + // excluding overlongs + if ((len - offset > 2) && + ((buf[offset + 0] == 0xE0 && + (0xA0 <= buf[offset + 1] && buf[offset + 1] <= 0xBF) && + (0x80 <= buf[offset + 2] && + buf[offset + 2] <= 0xBF)) || // straight 3-byte + (((0xE1 <= buf[offset + 0] && buf[offset + 0] <= 0xEC) || + buf[offset + 0] == 0xEE || buf[offset + 0] == 0xEF) && + (0x80 <= buf[offset + 1] && buf[offset + 1] <= 0xBF) && + (0x80 <= buf[offset + 2] && + buf[offset + 2] <= 0xBF)) || // excluding surrogates + (buf[offset + 0] == 0xED && + (0x80 <= buf[offset + 1] && buf[offset + 1] <= 0x9F) && + (0x80 <= buf[offset + 2] && buf[offset + 2] <= 0xBF)))) { + + offset += 3; + utf8++; + continue; + + } + + // planes 1-3 + if ((len - offset > 3) && + ((buf[offset + 0] == 0xF0 && + (0x90 <= buf[offset + 1] && buf[offset + 1] <= 0xBF) && + (0x80 <= buf[offset + 2] && buf[offset + 2] <= 0xBF) && + (0x80 <= buf[offset + 3] && + buf[offset + 3] <= 0xBF)) || // planes 4-15 + ((0xF1 <= buf[offset + 0] && buf[offset + 0] <= 0xF3) && + (0x80 <= buf[offset + 1] && buf[offset + 1] <= 0xBF) && + (0x80 <= buf[offset + 2] && buf[offset + 2] <= 0xBF) && + (0x80 <= buf[offset + 3] && buf[offset + 3] <= 0xBF)) || // plane 16 + (buf[offset + 0] == 0xF4 && + (0x80 <= buf[offset + 1] && buf[offset + 1] <= 0x8F) && + (0x80 <= buf[offset + 2] && buf[offset + 2] <= 0xBF) && + (0x80 <= buf[offset + 3] && buf[offset + 3] <= 0xBF)))) { + + offset += 4; + utf8++; + continue; + + } + + offset++; + + } + + return (utf8 > ascii ? utf8 : ascii); + +} + +/* check if queue entry is ascii or UTF-8 */ static u8 check_if_text(afl_state_t *afl, struct queue_entry *q) { diff --git a/src/afl-fuzz-stats.c b/src/afl-fuzz-stats.c index 870ba69a..7796036b 100644 --- a/src/afl-fuzz-stats.c +++ b/src/afl-fuzz-stats.c @@ -278,6 +278,7 @@ void write_stats_file(afl_state_t *afl, u32 t_bytes, double bitmap_cvg, "total_edges : %u\n" "var_byte_count : %u\n" "havoc_expansion : %u\n" + "auto_dict_entries : %u\n" "testcache_size : %llu\n" "testcache_count : %u\n" "testcache_evict : %u\n" @@ -316,7 +317,7 @@ void write_stats_file(afl_state_t *afl, u32 t_bytes, double bitmap_cvg, -1, #endif t_bytes, afl->fsrv.real_map_size, afl->var_byte_count, - afl->expand_havoc, afl->q_testcase_cache_size, + afl->expand_havoc, afl->a_extras_cnt, afl->q_testcase_cache_size, afl->q_testcase_cache_count, afl->q_testcase_evictions, afl->use_banner, afl->unicorn_mode ? "unicorn" : "", afl->fsrv.qemu_mode ? "qemu " : "", diff --git a/src/afl-fuzz.c b/src/afl-fuzz.c index 92a37697..6ec033b2 100644 --- a/src/afl-fuzz.c +++ b/src/afl-fuzz.c @@ -2261,6 +2261,20 @@ stop_fuzzing: } + if (afl->not_on_tty) { + + u32 t_bytes = count_non_255_bytes(afl, afl->virgin_bits); + u8 time_tmp[64]; + u_stringify_time_diff(time_tmp, get_cur_time(), afl->start_time); + ACTF( + "Statistics: %u new paths found, %.02f%% coverage achieved, %llu " + "crashes found, %llu timeouts found, total runtime %s", + afl->queued_discovered, + ((double)t_bytes * 100) / afl->fsrv.real_map_size, afl->unique_crashes, + afl->unique_hangs, time_tmp); + + } + #ifdef PROFILING SAYF(cYEL "[!] " cRST "Profiling information: %llu ms total work, %llu ns/run\n", -- cgit 1.4.1 From 2363a047500c133996e40808f1974e4aa97ed5bf Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Sun, 17 Oct 2021 14:21:08 +0200 Subject: simplify SHAPE_BYTES(h->shape) --- src/afl-fuzz-redqueen.c | 97 ++++++++++++++++++++++++++----------------------- 1 file changed, 51 insertions(+), 46 deletions(-) (limited to 'src') diff --git a/src/afl-fuzz-redqueen.c b/src/afl-fuzz-redqueen.c index 268f726c..2d610132 100644 --- a/src/afl-fuzz-redqueen.c +++ b/src/afl-fuzz-redqueen.c @@ -64,6 +64,8 @@ struct range { }; +static u32 hshape; + static struct range *add_range(struct range *ranges, u32 start, u32 end) { struct range *r = ck_alloc_nozero(sizeof(struct range)); @@ -763,7 +765,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h, // "Encode: %llx->%llx into %llx(<-%llx) at idx=%u " // "taint_len=%u shape=%u attr=%u\n", // o_pattern, pattern, repl, changed_val, idx, taint_len, - // h->shape + 1, attr); + // hshape, attr); //#ifdef CMPLOG_SOLVE_TRANSFORM // reverse atoi()/strnu?toll() is expensive, so we only to it in lvl 3 @@ -845,7 +847,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h, u64 b_val, o_b_val, mask; u8 bytes; - switch (SHAPE_BYTES(h->shape)) { + switch (hshape) { case 0: case 1: @@ -924,7 +926,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h, s64 diff = pattern - b_val; s64 o_diff = o_pattern - o_b_val; /* fprintf(stderr, "DIFF1 idx=%03u shape=%02u %llx-%llx=%lx\n", idx, - h->shape + 1, o_pattern, o_b_val, o_diff); + hshape, o_pattern, o_b_val, o_diff); fprintf(stderr, "DIFF1 %016llx %llx-%llx=%lx\n", repl, pattern, b_val, diff); */ if (diff == o_diff && diff) { @@ -953,7 +955,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h, s64 o_diff = o_pattern ^ o_b_val; /* fprintf(stderr, "DIFF2 idx=%03u shape=%02u %llx-%llx=%lx\n", - idx, h->shape + 1, o_pattern, o_b_val, o_diff); + idx, hshape, o_pattern, o_b_val, o_diff); fprintf(stderr, "DIFF2 %016llx %llx-%llx=%lx\n", repl, pattern, b_val, diff); */ @@ -1002,7 +1004,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h, } /* fprintf(stderr, "DIFF3 idx=%03u shape=%02u %llx-%llx=%lx\n", - idx, h->shape + 1, o_pattern, o_b_val, o_diff); + idx, hshape, o_pattern, o_b_val, o_diff); fprintf(stderr, "DIFF3 %016llx %llx-%llx=%lx\n", repl, pattern, b_val, diff); */ @@ -1051,7 +1053,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h, } /* fprintf(stderr, "DIFF4 idx=%03u shape=%02u %llx-%llx=%lx\n", - idx, h->shape + 1, o_pattern, o_b_val, o_diff); + idx, hshape, o_pattern, o_b_val, o_diff); fprintf(stderr, "DIFF4 %016llx %llx-%llx=%lx\n", repl, pattern, b_val, diff); */ @@ -1089,7 +1091,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h, if ((lvl & LVL1) || attr >= IS_FP_MOD) { - if (SHAPE_BYTES(h->shape) >= 8 && *status != 1) { + if (hshape >= 8 && *status != 1) { // if (its_len >= 8) // fprintf(stderr, @@ -1132,7 +1134,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h, } - if (SHAPE_BYTES(h->shape) >= 4 && *status != 1) { + if (hshape >= 4 && *status != 1) { // if (its_len >= 4 && (attr <= 1 || attr >= 8)) // fprintf(stderr, @@ -1173,7 +1175,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h, } - if (SHAPE_BYTES(h->shape) >= 2 && *status != 1) { + if (hshape >= 2 && *status != 1) { if (its_len >= 2 && ((*buf_16 == (u16)pattern && *o_buf_16 == (u16)o_pattern) || @@ -1244,7 +1246,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h, } - if (!(attr & (IS_GREATER | IS_LESSER)) || SHAPE_BYTES(h->shape) < 4) { + if (!(attr & (IS_GREATER | IS_LESSER)) || hshape < 4) { return 0; @@ -1272,7 +1274,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h, if (attr & IS_GREATER) { - if (SHAPE_BYTES(h->shape) == 4 && its_len >= 4) { + if (hshape == 4 && its_len >= 4) { float *f = (float *)&repl; float g = *f; @@ -1280,7 +1282,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h, u32 *r = (u32 *)&g; repl_new = (u32)*r; - } else if (SHAPE_BYTES(h->shape) == 8 && its_len >= 8) { + } else if (hshape == 8 && its_len >= 8) { double *f = (double *)&repl; double g = *f; @@ -1307,7 +1309,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h, } else { - if (SHAPE_BYTES(h->shape) == 4) { + if (hshape == 4) { float *f = (float *)&repl; float g = *f; @@ -1315,7 +1317,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h, u32 *r = (u32 *)&g; repl_new = (u32)*r; - } else if (SHAPE_BYTES(h->shape) == 8) { + } else if (hshape == 8) { double *f = (double *)&repl; double g = *f; @@ -1342,7 +1344,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h, } // transform double to float, llvm likes to do that internally ... - if (SHAPE_BYTES(h->shape) == 8 && its_len >= 4) { + if (hshape == 8 && its_len >= 4) { double *f = (double *)&repl; float g = (float)*f; @@ -1353,7 +1355,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h, memcpy(((char *)&repl_new) + 4, (char *)&g, 4); #endif changed_val = repl_new; - h->shape = 3; // modify shape + hshape = 4; // modify shape // fprintf(stderr, "DOUBLE2FLOAT %llx\n", repl_new); @@ -1361,12 +1363,12 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h, afl, h, pattern, repl_new, o_pattern, changed_val, 16, idx, taint_len, orig_buf, buf, cbuf, len, 1, lvl, status))) { - h->shape = 7; // recover shape + hshape = 8; // recover shape return 1; } - h->shape = 7; // recover shape + hshape = 7; // recover shape } @@ -1428,14 +1430,13 @@ static u8 cmp_extend_encodingN(afl_state_t *afl, struct cmp_header *h, u8 *r = (u8 *)&repl; u8 backup[16]; u32 its_len = MIN(len - idx, taint_len); - u32 shape = h->shape + 1; #if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) size_t off = 0; #else - size_t off = 16 - shape; + size_t off = 16 - hshape; #endif - if (its_len >= shape) { + if (its_len >= hshape) { #ifdef _DEBUG fprintf(stderr, "TestUN: %u>=%u (len=%u idx=%u attr=%u off=%lu) (%u) ", @@ -1462,18 +1463,18 @@ static u8 cmp_extend_encodingN(afl_state_t *afl, struct cmp_header *h, fprintf(stderr, "\n"); #endif - if (!memcmp(ptr, p + off, shape) && !memcmp(o_ptr, o_p + off, shape)) { + if (!memcmp(ptr, p + off, hshape) && !memcmp(o_ptr, o_p + off, hshape)) { - memcpy(backup, ptr, shape); - memcpy(ptr, r + off, shape); + memcpy(backup, ptr, hshape); + memcpy(ptr, r + off, hshape); if (unlikely(its_fuzz(afl, buf, len, status))) { return 1; } #ifdef CMPLOG_COMBINE - if (*status == 1) { memcpy(cbuf + idx, r, shape); } + if (*status == 1) { memcpy(cbuf + idx, r, hshape); } #endif - memcpy(ptr, backup, shape); + memcpy(ptr, backup, hshape); #ifdef _DEBUG fprintf(stderr, "Status=%u\n", *status); @@ -1485,8 +1486,8 @@ static u8 cmp_extend_encodingN(afl_state_t *afl, struct cmp_header *h, if (do_reverse && *status != 1) { if (unlikely(cmp_extend_encodingN( - afl, h, SWAPN(pattern, (shape << 3)), SWAPN(repl, (shape << 3)), - SWAPN(o_pattern, (shape << 3)), SWAPN(changed_val, (shape << 3)), + afl, h, SWAPN(pattern, (hshape << 3)), SWAPN(repl, (hshape << 3)), + SWAPN(o_pattern, (hshape << 3)), SWAPN(changed_val, (hshape << 3)), attr, idx, taint_len, orig_buf, buf, cbuf, len, 0, lvl, status))) { @@ -1615,6 +1616,8 @@ static u8 cmp_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, u8 s_v0_inc = 1, s_v1_inc = 1; u8 s_v0_dec = 1, s_v1_dec = 1; + hshape = SHAPE_BYTES(h->shape); + if (h->hits > CMP_MAP_H) { loggeds = CMP_MAP_H; @@ -1626,7 +1629,7 @@ static u8 cmp_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, } #ifdef WORD_SIZE_64 - switch (SHAPE_BYTES(h->shape)) { + switch (hshape) { case 1: case 2: @@ -1680,7 +1683,7 @@ static u8 cmp_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, #ifdef _DEBUG fprintf(stderr, "Handling: %llx->%llx vs %llx->%llx attr=%u shape=%u\n", orig_o->v0, o->v0, orig_o->v1, o->v1, h->attribute, - SHAPE_BYTES(h->shape)); + hshape); #endif t = taint; @@ -1830,7 +1833,7 @@ static u8 cmp_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, "END: %llx->%llx vs %llx->%llx attr=%u i=%u found=%u " "isN=%u size=%u\n", orig_o->v0, o->v0, orig_o->v1, o->v1, h->attribute, i, found_one, - is_n, SHAPE_BYTES(h->shape)); + is_n, hshape); #endif // If failed, add to dictionary @@ -1841,16 +1844,16 @@ static u8 cmp_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, #ifdef WORD_SIZE_64 if (unlikely(is_n)) { - try_to_add_to_dictN(afl, s128_v0, SHAPE_BYTES(h->shape)); - try_to_add_to_dictN(afl, s128_v1, SHAPE_BYTES(h->shape)); + try_to_add_to_dictN(afl, s128_v0, hshape); + try_to_add_to_dictN(afl, s128_v1, hshape); } else #endif { - try_to_add_to_dict(afl, o->v0, SHAPE_BYTES(h->shape)); - try_to_add_to_dict(afl, o->v1, SHAPE_BYTES(h->shape)); + try_to_add_to_dict(afl, o->v0, hshape); + try_to_add_to_dict(afl, o->v1, hshape); } @@ -2322,6 +2325,8 @@ static u8 rtn_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, u32 i, j, idx, have_taint = 1, taint_len, loggeds; u8 status = 0, found_one = 0; + hshape = SHAPE_BYTES(h->shape); + if (h->hits > CMP_MAP_RTN_H) { loggeds = CMP_MAP_RTN_H; @@ -2355,12 +2360,12 @@ static u8 rtn_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, /* struct cmp_header *hh = &afl->orig_cmp_map->headers[key]; fprintf(stderr, "RTN N hits=%u id=%u shape=%u attr=%u v0=", h->hits, - h->id, h->shape, h->attribute); + h->id, hshape, h->attribute); for (j = 0; j < 8; j++) fprintf(stderr, "%02x", o->v0[j]); fprintf(stderr, " v1="); for (j = 0; j < 8; j++) fprintf(stderr, "%02x", o->v1[j]); fprintf(stderr, "\nRTN O hits=%u id=%u shape=%u attr=%u o0=", - hh->hits, hh->id, hh->shape, hh->attribute); + hh->hits, hh->id, hshape, hh->attribute); for (j = 0; j < 8; j++) fprintf(stderr, "%02x", orig_o->v0[j]); fprintf(stderr, " o1="); for (j = 0; j < 8; j++) fprintf(stderr, "%02x", orig_o->v1[j]); @@ -2401,23 +2406,23 @@ static u8 rtn_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, #ifdef _DEBUG int w; fprintf(stderr, "key=%u idx=%u len=%u o0=", key, idx, - SHAPE_BYTES(h->shape)); - for (w = 0; w < SHAPE_BYTES(h->shape); ++w) + hshape); + for (w = 0; w < hshape; ++w) fprintf(stderr, "%02x", orig_o->v0[w]); fprintf(stderr, " v0="); - for (w = 0; w < SHAPE_BYTES(h->shape); ++w) + for (w = 0; w < hshape; ++w) fprintf(stderr, "%02x", o->v0[w]); fprintf(stderr, " o1="); - for (w = 0; w < SHAPE_BYTES(h->shape); ++w) + for (w = 0; w < hshape; ++w) fprintf(stderr, "%02x", orig_o->v1[w]); fprintf(stderr, " v1="); - for (w = 0; w < SHAPE_BYTES(h->shape); ++w) + for (w = 0; w < hshape; ++w) fprintf(stderr, "%02x", o->v1[w]); fprintf(stderr, "\n"); #endif if (unlikely(rtn_extend_encoding( - afl, o->v0, o->v1, orig_o->v0, orig_o->v1, SHAPE_BYTES(h->shape), + afl, o->v0, o->v1, orig_o->v0, orig_o->v1, hshape, idx, taint_len, orig_buf, buf, cbuf, len, lvl, &status))) { return 1; @@ -2434,7 +2439,7 @@ static u8 rtn_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, status = 0; if (unlikely(rtn_extend_encoding( - afl, o->v1, o->v0, orig_o->v1, orig_o->v0, SHAPE_BYTES(h->shape), + afl, o->v1, o->v0, orig_o->v1, orig_o->v0, hshape, idx, taint_len, orig_buf, buf, cbuf, len, lvl, &status))) { return 1; @@ -2455,8 +2460,8 @@ static u8 rtn_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, if (unlikely(!afl->pass_stats[key].total)) { - maybe_add_auto(afl, o->v0, SHAPE_BYTES(h->shape)); - maybe_add_auto(afl, o->v1, SHAPE_BYTES(h->shape)); + maybe_add_auto(afl, o->v0, hshape); + maybe_add_auto(afl, o->v1, hshape); } -- cgit 1.4.1 From bf0fbc24ad32b0ec5421f4d2090c85d87aeceee3 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Sun, 17 Oct 2021 15:44:48 +0200 Subject: new cmplog add dict strategy --- src/afl-fuzz-redqueen.c | 170 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 131 insertions(+), 39 deletions(-) (limited to 'src') diff --git a/src/afl-fuzz-redqueen.c b/src/afl-fuzz-redqueen.c index 2d610132..30de12c1 100644 --- a/src/afl-fuzz-redqueen.c +++ b/src/afl-fuzz-redqueen.c @@ -45,6 +45,23 @@ enum { }; +// add to dictionary enum +// DEFAULT = 1, notTXT = 2, FOUND = 4, notSAME = 8 +enum { + + DICT_ADD_NEVER = 0, + DICT_ADD_NOTFOUND_SAME_TXT = 1, + DICT_ADD_NOTFOUND_SAME = 3, + DICT_ADD_FOUND_SAME_TXT = 5, + DICT_ADD_FOUND_SAME = 7, + DICT_ADD_NOTFOUND_TXT = 9, + DICT_ADD_NOTFOUND = 11, + DICT_ADD_FOUND_TXT = 13, + DICT_ADD_FOUND = 15, + DICT_ADD_ANY = DICT_ADD_FOUND + +}; + // CMPLOG LVL enum { @@ -54,6 +71,8 @@ enum { }; +#define DICT_ADD_STRATEGY DICT_ADD_FOUND_SAME + struct range { u32 start; @@ -1246,11 +1265,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h, } - if (!(attr & (IS_GREATER | IS_LESSER)) || hshape < 4) { - - return 0; - - } + if (!(attr & (IS_GREATER | IS_LESSER)) || hshape < 4) { return 0; } // transform >= to < and <= to > if ((attr & IS_EQUAL) && (attr & (IS_GREATER | IS_LESSER))) { @@ -1487,9 +1502,9 @@ static u8 cmp_extend_encodingN(afl_state_t *afl, struct cmp_header *h, if (unlikely(cmp_extend_encodingN( afl, h, SWAPN(pattern, (hshape << 3)), SWAPN(repl, (hshape << 3)), - SWAPN(o_pattern, (hshape << 3)), SWAPN(changed_val, (hshape << 3)), - attr, idx, taint_len, orig_buf, buf, cbuf, len, 0, lvl, - status))) { + SWAPN(o_pattern, (hshape << 3)), + SWAPN(changed_val, (hshape << 3)), attr, idx, taint_len, orig_buf, + buf, cbuf, len, 0, lvl, status))) { return 1; @@ -1682,8 +1697,7 @@ static u8 cmp_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, #ifdef _DEBUG fprintf(stderr, "Handling: %llx->%llx vs %llx->%llx attr=%u shape=%u\n", - orig_o->v0, o->v0, orig_o->v1, o->v1, h->attribute, - hshape); + orig_o->v0, o->v0, orig_o->v1, o->v1, h->attribute, hshape); #endif t = taint; @@ -1836,26 +1850,39 @@ static u8 cmp_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, is_n, hshape); #endif - // If failed, add to dictionary - if (!found_one) { - - if (afl->pass_stats[key].total == 0) { + u8 same0 = 0, same1 = 0, result = 1 + 2 + (found_one << 2); + if (o->v0 != orig_o->v0) { same0 = 8; } + if (o->v1 != orig_o->v1) { same1 = 8; } #ifdef WORD_SIZE_64 - if (unlikely(is_n)) { + if (unlikely(is_n)) { - try_to_add_to_dictN(afl, s128_v0, hshape); - try_to_add_to_dictN(afl, s128_v1, hshape); + if (DICT_ADD_STRATEGY >= same0 + result) { - } else + try_to_add_to_dictN(afl, s128_v0, hshape); + + } + + if (DICT_ADD_STRATEGY >= same1 + result) { + + try_to_add_to_dictN(afl, s128_v1, hshape); + + } + + } else #endif - { + { - try_to_add_to_dict(afl, o->v0, hshape); - try_to_add_to_dict(afl, o->v1, hshape); + if (DICT_ADD_STRATEGY >= same0 + result) { - } + try_to_add_to_dict(afl, o->v0, hshape); + + } + + if (DICT_ADD_STRATEGY >= same1 + result) { + + try_to_add_to_dict(afl, o->v1, hshape); } @@ -1885,8 +1912,9 @@ static u8 cmp_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, } -static u8 rtn_extend_encoding(afl_state_t *afl, u8 *pattern, u8 *repl, - u8 *o_pattern, u8 *changed_val, u8 plen, u32 idx, +static u8 rtn_extend_encoding(afl_state_t *afl, u8 entry, + struct cmpfn_operands *o, + struct cmpfn_operands *orig_o, u32 idx, u32 taint_len, u8 *orig_buf, u8 *buf, u8 *cbuf, u32 len, u8 lvl, u8 *status) { @@ -1897,9 +1925,50 @@ static u8 rtn_extend_encoding(afl_state_t *afl, u8 *pattern, u8 *repl, // (void)(changed_val); //#endif + u8 *pattern, *repl, *o_pattern, *changed_val; + u8 l0, l1, ol0, ol1; + + if (entry == 0) { + + pattern = o->v0; + repl = o->v1; + o_pattern = orig_o->v0; + changed_val = orig_o->v1; + l0 = o->v0_len; + ol0 = orig_o->v0_len; + l1 = o->v1_len; + ol1 = orig_o->v1_len; + + } else { + + pattern = o->v1; + repl = o->v0; + o_pattern = orig_o->v1; + changed_val = orig_o->v0; + l0 = o->v1_len; + ol0 = orig_o->v1_len; + l1 = o->v0_len; + ol1 = orig_o->v0_len; + + } + + if (l0 >= 0x80) { + + l0 -= 0x80; + l1 -= 0x80; + ol0 -= 0x80; + ol1 -= 0x80; + + } else if (l0 == 0 || l1 == 0 || ol0 == 0 || ol1 == 0) { + + l0 = l1 = ol0 = ol1 = hshape; + + } + + u8 lmax = MAX(l0, ol0); u8 save[40]; u32 saved_idx = idx, pre, from = 0, to = 0, i, j; - u32 its_len = MIN((u32)plen, len - idx); + u32 its_len = MIN(MIN(lmax, hshape), len - idx); its_len = MIN(its_len, taint_len); u32 saved_its_len = its_len; @@ -1915,7 +1984,8 @@ static u8 rtn_extend_encoding(afl_state_t *afl, u8 *pattern, u8 *repl, (void)(j); #ifdef _DEBUG - fprintf(stderr, "RTN T idx=%u lvl=%02x ", idx, lvl); + fprintf(stderr, "RTN T idx=%u lvl=%02x is_txt=%u shape=%u/%u ", idx, lvl, + o->v0_len >= 0x80 ? 1 : 0, hshape, l0); for (j = 0; j < 8; j++) fprintf(stderr, "%02x", orig_buf[idx + j]); fprintf(stderr, " -> "); @@ -2405,8 +2475,7 @@ static u8 rtn_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, #ifdef _DEBUG int w; - fprintf(stderr, "key=%u idx=%u len=%u o0=", key, idx, - hshape); + fprintf(stderr, "key=%u idx=%u len=%u o0=", key, idx, hshape); for (w = 0; w < hshape; ++w) fprintf(stderr, "%02x", orig_o->v0[w]); fprintf(stderr, " v0="); @@ -2421,9 +2490,9 @@ static u8 rtn_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, fprintf(stderr, "\n"); #endif - if (unlikely(rtn_extend_encoding( - afl, o->v0, o->v1, orig_o->v0, orig_o->v1, hshape, - idx, taint_len, orig_buf, buf, cbuf, len, lvl, &status))) { + if (unlikely(rtn_extend_encoding(afl, 0, o, orig_o, idx, taint_len, + orig_buf, buf, cbuf, len, lvl, + &status))) { return 1; @@ -2438,9 +2507,9 @@ static u8 rtn_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, status = 0; - if (unlikely(rtn_extend_encoding( - afl, o->v1, o->v0, orig_o->v1, orig_o->v0, hshape, - idx, taint_len, orig_buf, buf, cbuf, len, lvl, &status))) { + if (unlikely(rtn_extend_encoding(afl, 1, o, orig_o, idx, taint_len, + orig_buf, buf, cbuf, len, lvl, + &status))) { return 1; @@ -2455,13 +2524,36 @@ static u8 rtn_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, } - // If failed, add to dictionary - if (!found_one && (lvl & LVL1)) { + // if (unlikely(!afl->pass_stats[key].total)) { + + if (lvl & LVL1) { + + u8 is_txt = 0, l0 = o->v0_len, ol0 = orig_o->v0_len, l1 = o->v1_len, + ol1 = orig_o->v1_len; + if (l0 >= 0x80) { + + is_txt = 1; + l0 -= 0x80; + l1 -= 0x80; + ol0 -= 0x80; + ol1 -= 0x80; + + } + + u8 same0 = 0, same1 = 0, result = 1 + (found_one << 2); + if (!is_txt) result += 2; + if (l0 != ol0 || memcmp(o->v0, orig_o->v0, l0) != 0) { same0 = 8; } + if (l1 != ol1 || memcmp(o->v1, orig_o->v1, l1) != 0) { same1 = 8; } + + if (DICT_ADD_STRATEGY >= same0 + result) { + + maybe_add_auto(afl, o->v0, l0); + + } - if (unlikely(!afl->pass_stats[key].total)) { + if (DICT_ADD_STRATEGY >= same1 + result) { - maybe_add_auto(afl, o->v0, hshape); - maybe_add_auto(afl, o->v1, hshape); + maybe_add_auto(afl, o->v1, l1); } -- cgit 1.4.1 From 462149de642451024199557c7a7bb7ea76e5b8fa Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Sun, 17 Oct 2021 20:31:02 +0200 Subject: fix --- instrumentation/afl-compiler-rt.o.c | 18 +------- src/afl-fuzz-redqueen.c | 87 ++++++++++++++++++++++--------------- 2 files changed, 53 insertions(+), 52 deletions(-) (limited to 'src') diff --git a/instrumentation/afl-compiler-rt.o.c b/instrumentation/afl-compiler-rt.o.c index 21772ca0..38beafb7 100644 --- a/instrumentation/afl-compiler-rt.o.c +++ b/instrumentation/afl-compiler-rt.o.c @@ -1944,15 +1944,8 @@ void __cmplog_rtn_hook_n(u8 *ptr1, u8 *ptr2, u32 len) { void __cmplog_rtn_hook_strn(u8 *ptr1, u8 *ptr2, u32 len) { /* - u32 i; if (area_is_valid(ptr1, 32) <= 0 || area_is_valid(ptr2, 32) <= 0) return; - fprintf(stderr, "rtn_strn len=%u arg0=", len); - for (i = 0; i < len; i++) - fprintf(stderr, "%02x", ptr1[i]); - fprintf(stderr, " arg1="); - for (i = 0; i < len; i++) - fprintf(stderr, "%02x", ptr2[i]); - fprintf(stderr, "\n"); + fprintf(stderr, "rtn_strn len=%u arg0=%s arg1=%s\n", len, ptr1, ptr2); */ if (likely(!__afl_cmp_map)) return; @@ -2005,15 +1998,8 @@ void __cmplog_rtn_hook_strn(u8 *ptr1, u8 *ptr2, u32 len) { void __cmplog_rtn_hook_str(u8 *ptr1, u8 *ptr2) { /* - u32 i; if (area_is_valid(ptr1, 32) <= 0 || area_is_valid(ptr2, 32) <= 0) return; - fprintf(stderr, "rtn_str arg0="); - for (i = 0; i < len; i++) - fprintf(stderr, "%02x", ptr1[i]); - fprintf(stderr, " arg1="); - for (i = 0; i < len; i++) - fprintf(stderr, "%02x", ptr2[i]); - fprintf(stderr, "\n"); + fprintf(stderr, "rtn_str arg0=%s arg1=%s\n", ptr1, ptr2); */ if (likely(!__afl_cmp_map)) return; diff --git a/src/afl-fuzz-redqueen.c b/src/afl-fuzz-redqueen.c index 30de12c1..65d21b0a 100644 --- a/src/afl-fuzz-redqueen.c +++ b/src/afl-fuzz-redqueen.c @@ -1455,25 +1455,25 @@ static u8 cmp_extend_encodingN(afl_state_t *afl, struct cmp_header *h, #ifdef _DEBUG fprintf(stderr, "TestUN: %u>=%u (len=%u idx=%u attr=%u off=%lu) (%u) ", - its_len, shape, len, idx, attr, off, do_reverse); + its_len, hshape, len, idx, attr, off, do_reverse); u32 i; u8 *o_r = (u8 *)&changed_val; - for (i = 0; i < shape; i++) + for (i = 0; i < hshape; i++) fprintf(stderr, "%02x", ptr[i]); fprintf(stderr, "=="); - for (i = 0; i < shape; i++) + for (i = 0; i < hshape; i++) fprintf(stderr, "%02x", p[off + i]); fprintf(stderr, " "); - for (i = 0; i < shape; i++) + for (i = 0; i < hshape; i++) fprintf(stderr, "%02x", o_ptr[i]); fprintf(stderr, "=="); - for (i = 0; i < shape; i++) + for (i = 0; i < hshape; i++) fprintf(stderr, "%02x", o_p[off + i]); fprintf(stderr, " <= "); - for (i = 0; i < shape; i++) + for (i = 0; i < hshape; i++) fprintf(stderr, "%02x", r[off + i]); fprintf(stderr, "<-"); - for (i = 0; i < shape; i++) + for (i = 0; i < hshape; i++) fprintf(stderr, "%02x", o_r[off + i]); fprintf(stderr, "\n"); #endif @@ -1850,39 +1850,46 @@ static u8 cmp_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, is_n, hshape); #endif - u8 same0 = 0, same1 = 0, result = 1 + 2 + (found_one << 2); - if (o->v0 != orig_o->v0) { same0 = 8; } - if (o->v1 != orig_o->v1) { same1 = 8; } + // we only learn 16 bit + + if (hshape > 1) { + + u8 same0 = 0, same1 = 0, result = 1 + 2 + (found_one << 2); + if (o->v0 != orig_o->v0) { same0 = 8; } + if (o->v1 != orig_o->v1) { same1 = 8; } #ifdef WORD_SIZE_64 - if (unlikely(is_n)) { + if (unlikely(is_n)) { - if (DICT_ADD_STRATEGY >= same0 + result) { + if (DICT_ADD_STRATEGY >= same0 + result) { - try_to_add_to_dictN(afl, s128_v0, hshape); + try_to_add_to_dictN(afl, s128_v0, hshape); - } + } - if (DICT_ADD_STRATEGY >= same1 + result) { + if (DICT_ADD_STRATEGY >= same1 + result) { - try_to_add_to_dictN(afl, s128_v1, hshape); + try_to_add_to_dictN(afl, s128_v1, hshape); - } + } - } else + } else #endif - { + { - if (DICT_ADD_STRATEGY >= same0 + result) { + if (DICT_ADD_STRATEGY >= same0 + result) { - try_to_add_to_dict(afl, o->v0, hshape); + // fprintf(stderr, "add v0 0x%llx\n", o->v0); + try_to_add_to_dict(afl, o->v0, hshape); - } + } - if (DICT_ADD_STRATEGY >= same1 + result) { + if (DICT_ADD_STRATEGY >= same1 + result) { - try_to_add_to_dict(afl, o->v1, hshape); + // fprintf(stderr, "add v1 0x%llx\n", o->v1); + try_to_add_to_dict(afl, o->v1, hshape); + + } } @@ -2428,18 +2435,22 @@ static u8 rtn_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, } /* - struct cmp_header *hh = &afl->orig_cmp_map->headers[key]; - fprintf(stderr, "RTN N hits=%u id=%u shape=%u attr=%u v0=", h->hits, - h->id, hshape, h->attribute); - for (j = 0; j < 8; j++) fprintf(stderr, "%02x", o->v0[j]); - fprintf(stderr, " v1="); - for (j = 0; j < 8; j++) fprintf(stderr, "%02x", o->v1[j]); - fprintf(stderr, "\nRTN O hits=%u id=%u shape=%u attr=%u o0=", - hh->hits, hh->id, hshape, hh->attribute); - for (j = 0; j < 8; j++) fprintf(stderr, "%02x", orig_o->v0[j]); - fprintf(stderr, " o1="); - for (j = 0; j < 8; j++) fprintf(stderr, "%02x", orig_o->v1[j]); - fprintf(stderr, "\n"); + struct cmp_header *hh = &afl->orig_cmp_map->headers[key]; + fprintf(stderr, "RTN N hits=%u id=%u shape=%u attr=%u v0=", h->hits, h->id, + hshape, h->attribute); + for (j = 0; j < 8; j++) + fprintf(stderr, "%02x", o->v0[j]); + fprintf(stderr, " v1="); + for (j = 0; j < 8; j++) + fprintf(stderr, "%02x", o->v1[j]); + fprintf(stderr, "\nRTN O hits=%u id=%u shape=%u attr=%u o0=", hh->hits, + hh->id, hshape, hh->attribute); + for (j = 0; j < 8; j++) + fprintf(stderr, "%02x", orig_o->v0[j]); + fprintf(stderr, " o1="); + for (j = 0; j < 8; j++) + fprintf(stderr, "%02x", orig_o->v1[j]); + fprintf(stderr, "\n"); */ t = taint; @@ -2547,12 +2558,14 @@ static u8 rtn_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, if (DICT_ADD_STRATEGY >= same0 + result) { + // fprintf(stderr, "add v0 [%u]\"%s\"\n", l0, o->v0); maybe_add_auto(afl, o->v0, l0); } if (DICT_ADD_STRATEGY >= same1 + result) { + // fprintf(stderr, "add v1 [%u]\"%s\"\n", l1, o->v1); maybe_add_auto(afl, o->v1, l1); } @@ -2727,11 +2740,13 @@ u8 input_to_state_stage(afl_state_t *afl, u8 *orig_buf, u8 *buf, u32 len) { if (afl->shm.cmp_map->headers[k].type == CMP_TYPE_INS) { + // fprintf(stderr, "INS %u\n", k); afl->stage_max += MIN((u32)(afl->shm.cmp_map->headers[k].hits), (u32)CMP_MAP_H); } else { + // fprintf(stderr, "RTN %u\n", k); afl->stage_max += MIN((u32)(afl->shm.cmp_map->headers[k].hits), (u32)CMP_MAP_RTN_H); -- cgit 1.4.1 From 72d10fee407f32d4041573d1906a047a67277eff Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Mon, 18 Oct 2021 10:03:39 +0200 Subject: dict enhancement --- src/afl-fuzz-redqueen.c | 64 ++++++++++++++++++++++++++++++------------------- 1 file changed, 39 insertions(+), 25 deletions(-) (limited to 'src') diff --git a/src/afl-fuzz-redqueen.c b/src/afl-fuzz-redqueen.c index 65d21b0a..10bcd63d 100644 --- a/src/afl-fuzz-redqueen.c +++ b/src/afl-fuzz-redqueen.c @@ -1853,41 +1853,48 @@ static u8 cmp_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, // we only learn 16 bit + if (hshape > 1) { - u8 same0 = 0, same1 = 0, result = 1 + 2 + (found_one << 2); + u8 same0 = 0, same1 = 0, same2 = 0, same3 = 0, + result = 1 + 2 + (found_one << 2); if (o->v0 != orig_o->v0) { same0 = 8; } if (o->v1 != orig_o->v1) { same1 = 8; } + if (o->v0 != o->v1) { same2 = 8; } + if (orig_o->v0 != orig_o->v1) { same3 = 8; } + + if (!(same0 && same1) && !same2 && !same3) { #ifdef WORD_SIZE_64 - if (unlikely(is_n)) { + if (unlikely(is_n)) { - if (DICT_ADD_STRATEGY >= same0 + result) { + if (DICT_ADD_STRATEGY >= same0 + result) { - try_to_add_to_dictN(afl, s128_v0, hshape); + try_to_add_to_dictN(afl, s128_v0, hshape); - } + } - if (DICT_ADD_STRATEGY >= same1 + result) { + if (DICT_ADD_STRATEGY >= same1 + result) { - try_to_add_to_dictN(afl, s128_v1, hshape); + try_to_add_to_dictN(afl, s128_v1, hshape); - } + } - } else + } else #endif - { + { - if (DICT_ADD_STRATEGY >= same0 + result) { + if (DICT_ADD_STRATEGY >= same0 + result) { - // fprintf(stderr, "add v0 0x%llx\n", o->v0); - try_to_add_to_dict(afl, o->v0, hshape); + // fprintf(stderr, "add v0 0x%llx\n", o->v0); + try_to_add_to_dict(afl, o->v0, hshape); - } + } - if (DICT_ADD_STRATEGY >= same1 + result) { + if (DICT_ADD_STRATEGY >= same1 + result) { + + // fprintf(stderr, "add v1 0x%llx\n", o->v1); + try_to_add_to_dict(afl, o->v1, hshape); - // fprintf(stderr, "add v1 0x%llx\n", o->v1); - try_to_add_to_dict(afl, o->v1, hshape); + } } @@ -2551,22 +2558,29 @@ static u8 rtn_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, } - u8 same0 = 0, same1 = 0, result = 1 + (found_one << 2); + u8 same0 = 0, same1 = 0, same2 = 0, same3 = 0, + result = 1 + (found_one << 2); if (!is_txt) result += 2; if (l0 != ol0 || memcmp(o->v0, orig_o->v0, l0) != 0) { same0 = 8; } if (l1 != ol1 || memcmp(o->v1, orig_o->v1, l1) != 0) { same1 = 8; } + if (l0 != l1 || memcmp(o->v0, o->v1, l0) != 0) { same2 = 8; } + if (ol0 != ol1 || memcmp(orig_o->v0, orig_o->v1, l0) != 0) { same3 = 8; } - if (DICT_ADD_STRATEGY >= same0 + result) { + if (!(same0 && same1) && !same2 && !same3) { - // fprintf(stderr, "add v0 [%u]\"%s\"\n", l0, o->v0); - maybe_add_auto(afl, o->v0, l0); + if (DICT_ADD_STRATEGY >= same0 + result) { - } + // fprintf(stderr, "add v0 [%u]\"%s\"\n", l0, o->v0); + maybe_add_auto(afl, o->v0, l0); - if (DICT_ADD_STRATEGY >= same1 + result) { + } + + if (DICT_ADD_STRATEGY >= same1 + result) { - // fprintf(stderr, "add v1 [%u]\"%s\"\n", l1, o->v1); - maybe_add_auto(afl, o->v1, l1); + // fprintf(stderr, "add v1 [%u]\"%s\"\n", l1, o->v1); + maybe_add_auto(afl, o->v1, l1); + + } } -- cgit 1.4.1 From 699df8f8ce4cc5de56510f72ebff611d26710557 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Mon, 18 Oct 2021 11:11:56 +0200 Subject: fix --- src/afl-fuzz-redqueen.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/afl-fuzz-redqueen.c b/src/afl-fuzz-redqueen.c index 10bcd63d..30bad557 100644 --- a/src/afl-fuzz-redqueen.c +++ b/src/afl-fuzz-redqueen.c @@ -1860,18 +1860,18 @@ static u8 cmp_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, if (o->v0 != o->v1) { same2 = 8; } if (orig_o->v0 != orig_o->v1) { same3 = 8; } - if (!(same0 && same1) && !same2 && !same3) { + if (!same2 && !same3) { #ifdef WORD_SIZE_64 if (unlikely(is_n)) { - if (DICT_ADD_STRATEGY >= same0 + result) { + if (!(!same0 && same1) && DICT_ADD_STRATEGY >= same0 + result) { try_to_add_to_dictN(afl, s128_v0, hshape); } - if (DICT_ADD_STRATEGY >= same1 + result) { + if (!(same0 && !same1) && DICT_ADD_STRATEGY >= same1 + result) { try_to_add_to_dictN(afl, s128_v1, hshape); @@ -2566,16 +2566,16 @@ static u8 rtn_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, if (l0 != l1 || memcmp(o->v0, o->v1, l0) != 0) { same2 = 8; } if (ol0 != ol1 || memcmp(orig_o->v0, orig_o->v1, l0) != 0) { same3 = 8; } - if (!(same0 && same1) && !same2 && !same3) { + if (!same2 && !same3) { - if (DICT_ADD_STRATEGY >= same0 + result) { + if (!(same0 && !same1) && DICT_ADD_STRATEGY >= same0 + result) { // fprintf(stderr, "add v0 [%u]\"%s\"\n", l0, o->v0); maybe_add_auto(afl, o->v0, l0); } - if (DICT_ADD_STRATEGY >= same1 + result) { + if (!(!same0 && same1) && DICT_ADD_STRATEGY >= same1 + result) { // fprintf(stderr, "add v1 [%u]\"%s\"\n", l1, o->v1); maybe_add_auto(afl, o->v1, l1); -- cgit 1.4.1 From b4e328803c4e61c81fdf0e0412f013fcf0e72e0d Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Tue, 26 Oct 2021 18:10:57 +0200 Subject: variants --- include/cmplog.h | 2 +- src/afl-fuzz-redqueen.c | 93 ++++++++++++++++++++++++++++++++++++------------- 2 files changed, 70 insertions(+), 25 deletions(-) (limited to 'src') diff --git a/include/cmplog.h b/include/cmplog.h index 88aa0a61..b95a8e43 100644 --- a/include/cmplog.h +++ b/include/cmplog.h @@ -33,7 +33,7 @@ #define CMPLOG_LVL_MAX 3 #define CMP_MAP_W 65536 -#define CMP_MAP_H 64 +#define CMP_MAP_H 32 #define CMP_MAP_RTN_H (CMP_MAP_H / 4) #define SHAPE_BYTES(x) (x + 1) diff --git a/src/afl-fuzz-redqueen.c b/src/afl-fuzz-redqueen.c index 30bad557..6002703e 100644 --- a/src/afl-fuzz-redqueen.c +++ b/src/afl-fuzz-redqueen.c @@ -28,6 +28,8 @@ #include "afl-fuzz.h" #include "cmplog.h" +#define VARIANT 0 + //#define _DEBUG //#define CMPLOG_INTROSPECTION @@ -461,10 +463,10 @@ static u8 colorization(afl_state_t *afl, u8 *buf, u32 len, fprintf( f, "Colorization: fname=%s len=%u ms=%llu result=%u execs=%u found=%llu " - "taint=%u\n", + "taint=%u ascii=%u auto_extra_before=%u\n", afl->queue_cur->fname, len, get_cur_time() - start_time, afl->queue_cur->colorized, afl->stage_cur, new_hit_cnt - orig_hit_cnt, - positions); + positions, afl->queue_cur->is_ascii ? 1 : 0, afl->a_extras_cnt); #ifndef _DEBUG if (afl->not_on_tty) { fclose(f); } @@ -1854,24 +1856,34 @@ static u8 cmp_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, if (hshape > 1) { u8 same0 = 0, same1 = 0, same2 = 0, same3 = 0, - result = 1 + 2 + (found_one << 2); + result = 1 + (found_one << 2); if (o->v0 != orig_o->v0) { same0 = 8; } if (o->v1 != orig_o->v1) { same1 = 8; } if (o->v0 != o->v1) { same2 = 8; } if (orig_o->v0 != orig_o->v1) { same3 = 8; } + if (check_if_text_buf((u8 *)o->v0, hshape) < hshape) same0 += 2; + if (check_if_text_buf((u8 *)o->v1, hshape) < hshape) same1 += 2; if (!same2 && !same3) { #ifdef WORD_SIZE_64 if (unlikely(is_n)) { - if (!(!same0 && same1) && DICT_ADD_STRATEGY >= same0 + result) { + if ( + #if VARIANT == 1 + !(!same0 && same1) && + #endif + DICT_ADD_STRATEGY >= same0 + result) { try_to_add_to_dictN(afl, s128_v0, hshape); } - if (!(same0 && !same1) && DICT_ADD_STRATEGY >= same1 + result) { + if ( + #if VARIANT == 1 + !(same0 && !same1) && + #endif + DICT_ADD_STRATEGY >= same1 + result) { try_to_add_to_dictN(afl, s128_v1, hshape); @@ -1882,14 +1894,22 @@ static u8 cmp_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, #endif { - if (DICT_ADD_STRATEGY >= same0 + result) { + if ( +#if VARIANT == 1 + !(!same0 && same1) && +#endif + DICT_ADD_STRATEGY >= same0 + result) { // fprintf(stderr, "add v0 0x%llx\n", o->v0); try_to_add_to_dict(afl, o->v0, hshape); } - if (DICT_ADD_STRATEGY >= same1 + result) { + if ( +#if VARIANT == 1 + !(same0 && !same1) && +#endif + DICT_ADD_STRATEGY >= same1 + result) { // fprintf(stderr, "add v1 0x%llx\n", o->v1); try_to_add_to_dict(afl, o->v1, hshape); @@ -1966,14 +1986,17 @@ static u8 rtn_extend_encoding(afl_state_t *afl, u8 entry, } - if (l0 >= 0x80) { + if (l0 >= 0x80 || ol0 >= 0x80) { l0 -= 0x80; l1 -= 0x80; ol0 -= 0x80; ol1 -= 0x80; - } else if (l0 == 0 || l1 == 0 || ol0 == 0 || ol1 == 0) { + } + + if (l0 == 0 || l1 == 0 || ol0 == 0 || ol1 == 0 || l0 > 31 || l1 > 31 || + ol0 > 31 || ol1 > 31) { l0 = l1 = ol0 = ol1 = hshape; @@ -2059,10 +2082,10 @@ static u8 rtn_extend_encoding(afl_state_t *afl, u8 entry, } - //#ifdef CMPLOG_SOLVE_TRANSFORM - if (*status == 1) return 0; + // transform solving + if (afl->cmplog_enable_transform && (lvl & LVL3)) { u32 toupper = 0, tolower = 0, xor = 0, arith = 0, tohex = 0, fromhex = 0; @@ -2548,7 +2571,7 @@ static u8 rtn_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, u8 is_txt = 0, l0 = o->v0_len, ol0 = orig_o->v0_len, l1 = o->v1_len, ol1 = orig_o->v1_len; - if (l0 >= 0x80) { + if (l0 >= 0x80 || ol0 >= 0x80) { is_txt = 1; l0 -= 0x80; @@ -2558,29 +2581,51 @@ static u8 rtn_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, } + if (l0 == 0 || l1 == 0 || ol0 == 0 || ol1 == 0 || l0 > 31 || l1 > 31 || + ol0 > 31 || ol1 > 31) { + + l0 = l1 = ol0 = ol1 = hshape; + + } + u8 same0 = 0, same1 = 0, same2 = 0, same3 = 0, result = 1 + (found_one << 2); - if (!is_txt) result += 2; if (l0 != ol0 || memcmp(o->v0, orig_o->v0, l0) != 0) { same0 = 8; } if (l1 != ol1 || memcmp(o->v1, orig_o->v1, l1) != 0) { same1 = 8; } if (l0 != l1 || memcmp(o->v0, o->v1, l0) != 0) { same2 = 8; } if (ol0 != ol1 || memcmp(orig_o->v0, orig_o->v1, l0) != 0) { same3 = 8; } + if (!is_txt && check_if_text_buf((u8 *)&o->v0, l0) < l0) { same0 += 2; } + if (!is_txt && check_if_text_buf((u8 *)&o->v1, l1) < l1) { same1 += 2; } - if (!same2 && !same3) { - - if (!(same0 && !same1) && DICT_ADD_STRATEGY >= same0 + result) { + if ( +#if VARIANT < 3 + !(same0 && !same1) && +#endif +#if VARIANT == 4 + (!same2 && same3) +#else + (DICT_ADD_STRATEGY >= same0 + result) +#endif + ) { - // fprintf(stderr, "add v0 [%u]\"%s\"\n", l0, o->v0); - maybe_add_auto(afl, o->v0, l0); + // fprintf(stderr, "add v0 [%u]\"%s\"\n", l0, o->v0); + maybe_add_auto(afl, o->v0, l0); - } - - if (!(!same0 && same1) && DICT_ADD_STRATEGY >= same1 + result) { + } - // fprintf(stderr, "add v1 [%u]\"%s\"\n", l1, o->v1); - maybe_add_auto(afl, o->v1, l1); + if ( +#if VARIANT < 3 + !(!same0 && same1) && +#endif +#if VARIANT == 4 + (same2 && !same3) +#else + (DICT_ADD_STRATEGY >= same1 + result) +#endif + ) { - } + // fprintf(stderr, "add v1 [%u]\"%s\"\n", l1, o->v1); + maybe_add_auto(afl, o->v1, l1); } -- cgit 1.4.1 From 245e91eb27f1572e00c6664477d588c540c30ed6 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Tue, 26 Oct 2021 21:30:08 +0200 Subject: fix --- src/afl-fuzz-redqueen.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/afl-fuzz-redqueen.c b/src/afl-fuzz-redqueen.c index 6002703e..800a0753 100644 --- a/src/afl-fuzz-redqueen.c +++ b/src/afl-fuzz-redqueen.c @@ -28,7 +28,7 @@ #include "afl-fuzz.h" #include "cmplog.h" -#define VARIANT 0 +#define VARIANT 4 //#define _DEBUG //#define CMPLOG_INTROSPECTION @@ -1861,8 +1861,8 @@ static u8 cmp_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, if (o->v1 != orig_o->v1) { same1 = 8; } if (o->v0 != o->v1) { same2 = 8; } if (orig_o->v0 != orig_o->v1) { same3 = 8; } - if (check_if_text_buf((u8 *)o->v0, hshape) < hshape) same0 += 2; - if (check_if_text_buf((u8 *)o->v1, hshape) < hshape) same1 += 2; + if (check_if_text_buf((u8 *)&o->v0, hshape) < hshape) same0 += 2; + if (check_if_text_buf((u8 *)&o->v1, hshape) < hshape) same1 += 2; if (!same2 && !same3) { -- cgit 1.4.1 From 9a11c8ac24f8b2c2dcd78aa3a27e5416a717ce4f Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Tue, 26 Oct 2021 22:06:01 +0200 Subject: fix --- src/afl-fuzz-redqueen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/afl-fuzz-redqueen.c b/src/afl-fuzz-redqueen.c index 800a0753..928cab59 100644 --- a/src/afl-fuzz-redqueen.c +++ b/src/afl-fuzz-redqueen.c @@ -28,7 +28,7 @@ #include "afl-fuzz.h" #include "cmplog.h" -#define VARIANT 4 +#define VARIANT 0 //#define _DEBUG //#define CMPLOG_INTROSPECTION -- cgit 1.4.1 From e2ca7a63972ef3c245395bea7bb89d873ad757af Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Thu, 28 Oct 2021 18:02:57 +0200 Subject: variant 6 --- instrumentation/afl-compiler-rt.o.c | 44 ++++++------- src/afl-fuzz-redqueen.c | 126 +++++++++++++++++++++++++++++------- 2 files changed, 125 insertions(+), 45 deletions(-) (limited to 'src') diff --git a/instrumentation/afl-compiler-rt.o.c b/instrumentation/afl-compiler-rt.o.c index 5caf57b3..17724e74 100644 --- a/instrumentation/afl-compiler-rt.o.c +++ b/instrumentation/afl-compiler-rt.o.c @@ -22,6 +22,10 @@ #include "cmplog.h" #include "llvm-alternative-coverage.h" +#define XXH_INLINE_ALL +#include "xxhash.h" +#undef XXH_INLINE_ALL + #include #include #include @@ -154,6 +158,12 @@ static void at_exit(int signal) { } +#ifdef WORD_SIZE_64 + #define default_hash(a, b) XXH64(a, b, HASH_CONST) +#else + #define default_hash(a, b) XXH64(a, b, HASH_CONST) +#endif + /* Uninspired gcc plugin instrumentation */ void __afl_trace(const u32 x) { @@ -1499,8 +1509,7 @@ void __cmplog_ins_hook1(uint8_t arg1, uint8_t arg2, uint8_t attr) { if (unlikely(!__afl_cmp_map || arg1 == arg2)) return; uintptr_t k = (uintptr_t)__builtin_return_address(0); - k = (k >> 4) ^ (k << 8); - k &= CMP_MAP_W - 1; + k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1)); u32 hits; @@ -1530,8 +1539,7 @@ void __cmplog_ins_hook2(uint16_t arg1, uint16_t arg2, uint8_t attr) { if (unlikely(!__afl_cmp_map || arg1 == arg2)) return; uintptr_t k = (uintptr_t)__builtin_return_address(0); - k = (k >> 4) ^ (k << 8); - k &= CMP_MAP_W - 1; + k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1)); u32 hits; @@ -1569,8 +1577,7 @@ void __cmplog_ins_hook4(uint32_t arg1, uint32_t arg2, uint8_t attr) { if (unlikely(!__afl_cmp_map || arg1 == arg2)) return; uintptr_t k = (uintptr_t)__builtin_return_address(0); - k = (k >> 4) ^ (k << 8); - k &= CMP_MAP_W - 1; + k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1)); u32 hits; @@ -1608,8 +1615,7 @@ void __cmplog_ins_hook8(uint64_t arg1, uint64_t arg2, uint8_t attr) { if (unlikely(!__afl_cmp_map || arg1 == arg2)) return; uintptr_t k = (uintptr_t)__builtin_return_address(0); - k = (k >> 4) ^ (k << 8); - k &= CMP_MAP_W - 1; + k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1)); u32 hits; @@ -1652,8 +1658,7 @@ void __cmplog_ins_hookN(uint128_t arg1, uint128_t arg2, uint8_t attr, if (unlikely(!__afl_cmp_map || arg1 == arg2)) return; uintptr_t k = (uintptr_t)__builtin_return_address(0); - k = (k >> 4) ^ (k << 8); - k &= CMP_MAP_W - 1; + k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1)); u32 hits; @@ -1696,8 +1701,7 @@ void __cmplog_ins_hook16(uint128_t arg1, uint128_t arg2, uint8_t attr) { if (likely(!__afl_cmp_map)) return; uintptr_t k = (uintptr_t)__builtin_return_address(0); - k = (k >> 4) ^ (k << 8); - k &= CMP_MAP_W - 1; + k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1)); u32 hits; @@ -1802,8 +1806,8 @@ void __sanitizer_cov_trace_switch(uint64_t val, uint64_t *cases) { for (uint64_t i = 0; i < cases[0]; i++) { uintptr_t k = (uintptr_t)__builtin_return_address(0) + i; - k = (k >> 4) ^ (k << 8); - k &= CMP_MAP_W - 1; + k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & + (CMP_MAP_W - 1)); u32 hits; @@ -1901,8 +1905,7 @@ void __cmplog_rtn_hook_n(u8 *ptr1, u8 *ptr2, u64 len) { // fprintf(stderr, "RTN2 %u\n", l); uintptr_t k = (uintptr_t)__builtin_return_address(0); - k = (k >> 4) ^ (k << 8); - k &= CMP_MAP_W - 1; + k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1)); u32 hits, reset = 1; @@ -1955,8 +1958,7 @@ void __cmplog_rtn_hook_strn(u8 *ptr1, u8 *ptr2, u64 len) { // fprintf(stderr, "RTN2 %u\n", l); uintptr_t k = (uintptr_t)__builtin_return_address(0); - k = (k >> 4) ^ (k << 8); - k &= CMP_MAP_W - 1; + k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1)); u32 hits, reset = 1; @@ -2011,8 +2013,7 @@ void __cmplog_rtn_hook_str(u8 *ptr1, u8 *ptr2) { // fprintf(stderr, "RTN2 %u\n", l); uintptr_t k = (uintptr_t)__builtin_return_address(0); - k = (k >> 4) ^ (k << 8); - k &= CMP_MAP_W - 1; + k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1)); u32 hits, reset = 1; @@ -2075,8 +2076,7 @@ void __cmplog_rtn_hook(u8 *ptr1, u8 *ptr2) { // fprintf(stderr, "RTN2 %u\n", len); uintptr_t k = (uintptr_t)__builtin_return_address(0); - k = (k >> 4) ^ (k << 8); - k &= CMP_MAP_W - 1; + k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1)); u32 hits, reset = 1; diff --git a/src/afl-fuzz-redqueen.c b/src/afl-fuzz-redqueen.c index 928cab59..b5dc23c2 100644 --- a/src/afl-fuzz-redqueen.c +++ b/src/afl-fuzz-redqueen.c @@ -28,7 +28,7 @@ #include "afl-fuzz.h" #include "cmplog.h" -#define VARIANT 0 +#define VARIANT 6 //#define _DEBUG //#define CMPLOG_INTROSPECTION @@ -1855,6 +1855,43 @@ static u8 cmp_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, // we only learn 16 bit + if (hshape > 1) { +#if VARIANT == 6 + if (!found_one || afl->queue_cur->is_ascii) { + + #ifdef WORD_SIZE_64 + if (unlikely(is_n)) { + + if (!found_one || + check_if_text_buf((u8 *)&s128_v0, SHAPE_BYTES(h->shape)) == + SHAPE_BYTES(h->shape)) + try_to_add_to_dictN(afl, s128_v0, SHAPE_BYTES(h->shape)); + if (!found_one || + check_if_text_buf((u8 *)&s128_v1, SHAPE_BYTES(h->shape)) == + SHAPE_BYTES(h->shape)) + try_to_add_to_dictN(afl, s128_v1, SHAPE_BYTES(h->shape)); + + } else + + #endif + { + + if (!memcmp((u8 *)&o->v0, (u8 *)&orig_o->v0, SHAPE_BYTES(h->shape)) && + (!found_one || + check_if_text_buf((u8 *)&o->v0, SHAPE_BYTES(h->shape)) == + SHAPE_BYTES(h->shape))) + try_to_add_to_dict(afl, o->v0, SHAPE_BYTES(h->shape)); + if (!memcmp((u8 *)&o->v1, (u8 *)&orig_o->v1, SHAPE_BYTES(h->shape)) && + (!found_one || + check_if_text_buf((u8 *)&o->v1, SHAPE_BYTES(h->shape)) == + SHAPE_BYTES(h->shape))) + try_to_add_to_dict(afl, o->v1, SHAPE_BYTES(h->shape)); + + } + + } + +#else + u8 same0 = 0, same1 = 0, same2 = 0, same3 = 0, result = 1 + (found_one << 2); if (o->v0 != orig_o->v0) { same0 = 8; } @@ -1866,13 +1903,13 @@ static u8 cmp_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, if (!same2 && !same3) { -#ifdef WORD_SIZE_64 + #ifdef WORD_SIZE_64 if (unlikely(is_n)) { if ( - #if VARIANT == 1 + #if VARIANT == 1 !(!same0 && same1) && - #endif + #endif DICT_ADD_STRATEGY >= same0 + result) { try_to_add_to_dictN(afl, s128_v0, hshape); @@ -1880,9 +1917,9 @@ static u8 cmp_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, } if ( - #if VARIANT == 1 + #if VARIANT == 1 !(same0 && !same1) && - #endif + #endif DICT_ADD_STRATEGY >= same1 + result) { try_to_add_to_dictN(afl, s128_v1, hshape); @@ -1891,13 +1928,13 @@ static u8 cmp_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, } else -#endif + #endif { if ( -#if VARIANT == 1 + #if VARIANT == 1 !(!same0 && same1) && -#endif + #endif DICT_ADD_STRATEGY >= same0 + result) { // fprintf(stderr, "add v0 0x%llx\n", o->v0); @@ -1906,9 +1943,9 @@ static u8 cmp_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, } if ( -#if VARIANT == 1 + #if VARIANT == 1 !(same0 && !same1) && -#endif + #endif DICT_ADD_STRATEGY >= same1 + result) { // fprintf(stderr, "add v1 0x%llx\n", o->v1); @@ -1920,6 +1957,8 @@ static u8 cmp_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, } +#endif + } cmp_fuzz_next_iter: @@ -2567,6 +2606,44 @@ static u8 rtn_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, // if (unlikely(!afl->pass_stats[key].total)) { +#if VARIANT == 6 + if ((!found_one && (lvl & LVL1)) || afl->queue_cur->is_ascii) { + + // if (unlikely(!afl->pass_stats[key].total)) { + + u32 shape_len = SHAPE_BYTES(h->shape); + u32 v0_len = shape_len, v1_len = shape_len; + if (afl->queue_cur->is_ascii || + check_if_text_buf((u8 *)&o->v0, shape_len) == shape_len) { + + if (strlen(o->v0)) v0_len = strlen(o->v0); + + } + + if (afl->queue_cur->is_ascii || + check_if_text_buf((u8 *)&o->v1, shape_len) == shape_len) { + + if (strlen(o->v1)) v1_len = strlen(o->v1); + + } + + // fprintf(stderr, "SHOULD: found:%u ascii:%u text?%u:%u %u:%s %u:%s \n", + // found_one, afl->queue_cur->is_ascii, check_if_text_buf((u8 *)&o->v0, + // shape_len), check_if_text_buf((u8 *)&o->v1, shape_len), v0_len, + // o->v0, v1_len, o->v1); + + if (!memcmp(o->v0, orig_o->v0, v0_len) || + (!found_one || check_if_text_buf((u8 *)&o->v0, v0_len) == v0_len)) + maybe_add_auto(afl, o->v0, v0_len); + if (!memcmp(o->v1, orig_o->v1, v1_len) || + (!found_one || check_if_text_buf((u8 *)&o->v1, v1_len) == v1_len)) + maybe_add_auto(afl, o->v1, v1_len); + + //} + + } + +#else if (lvl & LVL1) { u8 is_txt = 0, l0 = o->v0_len, ol0 = orig_o->v0_len, l1 = o->v1_len, @@ -2598,14 +2675,14 @@ static u8 rtn_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, if (!is_txt && check_if_text_buf((u8 *)&o->v1, l1) < l1) { same1 += 2; } if ( -#if VARIANT < 3 + #if VARIANT < 3 !(same0 && !same1) && -#endif -#if VARIANT == 4 + #endif + #if VARIANT == 4 (!same2 && same3) -#else + #else (DICT_ADD_STRATEGY >= same0 + result) -#endif + #endif ) { // fprintf(stderr, "add v0 [%u]\"%s\"\n", l0, o->v0); @@ -2614,14 +2691,14 @@ static u8 rtn_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, } if ( -#if VARIANT < 3 + #if VARIANT < 3 !(!same0 && same1) && -#endif -#if VARIANT == 4 + #endif + #if VARIANT == 4 (same2 && !same3) -#else + #else (DICT_ADD_STRATEGY >= same1 + result) -#endif + #endif ) { // fprintf(stderr, "add v1 [%u]\"%s\"\n", l1, o->v1); @@ -2631,6 +2708,8 @@ static u8 rtn_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, } +#endif + rtn_fuzz_next_iter: afl->stage_cur++; @@ -2966,9 +3045,10 @@ exit_its: if (f) { fprintf(f, - "Cmplog: fname=%s len=%u ms=%llu result=%u finds=%llu entries=%u\n", + "Cmplog: fname=%s len=%u ms=%llu result=%u finds=%llu entries=%u " + "auto_extra_after=%u\n", afl->queue_cur->fname, len, get_cur_time() - start_time, r, - new_hit_cnt - orig_hit_cnt, cmp_locations); + new_hit_cnt - orig_hit_cnt, cmp_locations, afl->a_extras_cnt); #ifndef _DEBUG if (afl->not_on_tty) { fclose(f); } -- cgit 1.4.1 From 639d108512027bda6e7baf4a96f9d4e058e3c1d3 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Sun, 31 Oct 2021 12:29:45 +0100 Subject: add cmplog compatability check --- include/types.h | 2 + instrumentation/afl-compiler-rt.o.c | 9 ++- src/afl-forkserver.c | 11 +++ src/afl-fuzz-redqueen.c | 144 +----------------------------------- 4 files changed, 22 insertions(+), 144 deletions(-) (limited to 'src') diff --git a/include/types.h b/include/types.h index 7b94fb83..063e6bbd 100644 --- a/include/types.h +++ b/include/types.h @@ -46,6 +46,7 @@ typedef uint128_t u128; #define FS_ERROR_SHM_OPEN 4 #define FS_ERROR_SHMAT 8 #define FS_ERROR_MMAP 16 +#define FS_ERROR_OLD_CMPLOG 32 /* Reporting options */ #define FS_OPT_ENABLED 0x80000001 @@ -53,6 +54,7 @@ typedef uint128_t u128; #define FS_OPT_SNAPSHOT 0x20000000 #define FS_OPT_AUTODICT 0x10000000 #define FS_OPT_SHDMEM_FUZZ 0x01000000 +#define FS_OPT_NEWCMPLOG 0x02000000 #define FS_OPT_OLD_AFLPP_WORKAROUND 0x0f000000 // FS_OPT_MAX_MAPSIZE is 8388608 = 0x800000 = 2^23 = 1 << 22 #define FS_OPT_MAX_MAPSIZE ((0x00fffffeU >> 1) + 1) diff --git a/instrumentation/afl-compiler-rt.o.c b/instrumentation/afl-compiler-rt.o.c index 17724e74..6ee86add 100644 --- a/instrumentation/afl-compiler-rt.o.c +++ b/instrumentation/afl-compiler-rt.o.c @@ -679,7 +679,7 @@ static void __afl_start_snapshots(void) { /* Phone home and tell the parent that we're OK. If parent isn't there, assume we're not running in forkserver mode and just execute program. */ - status |= (FS_OPT_ENABLED | FS_OPT_SNAPSHOT); + status |= (FS_OPT_ENABLED | FS_OPT_SNAPSHOT | FS_OPT_NEWCMPLOG); if (__afl_sharedmem_fuzzing != 0) status |= FS_OPT_SHDMEM_FUZZ; if (__afl_map_size <= FS_OPT_MAX_MAPSIZE) status |= (FS_OPT_SET_MAPSIZE(__afl_map_size) | FS_OPT_MAPSIZE); @@ -945,7 +945,12 @@ static void __afl_start_forkserver(void) { } if (__afl_sharedmem_fuzzing != 0) { status_for_fsrv |= FS_OPT_SHDMEM_FUZZ; } - if (status_for_fsrv) { status_for_fsrv |= (FS_OPT_ENABLED); } + if (status_for_fsrv) { + + status_for_fsrv |= (FS_OPT_ENABLED | FS_OPT_NEWCMPLOG); + + } + memcpy(tmp, &status_for_fsrv, 4); /* Phone home and tell the parent that we're OK. If parent isn't there, diff --git a/src/afl-forkserver.c b/src/afl-forkserver.c index 54f510c4..da036d2f 100644 --- a/src/afl-forkserver.c +++ b/src/afl-forkserver.c @@ -342,6 +342,11 @@ static void report_error_and_exit(int error) { "the fuzzing target reports that the mmap() call to the shared " "memory failed."); break; + case FS_ERROR_OLD_CMPLOG: + FATAL( + "the -c cmplog target was instrumented with an too old afl++ " + "version, you need to recompile it."); + break; default: FATAL("unknown error code %d from fuzzing target!", error); @@ -651,6 +656,12 @@ void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv, if ((status & FS_OPT_OLD_AFLPP_WORKAROUND) == FS_OPT_OLD_AFLPP_WORKAROUND) status = (status & 0xf0ffffff); + if ((status & FS_OPT_NEWCMPLOG) == 0 && fsrv->cmplog_binary) { + + report_error_and_exit(FS_ERROR_OLD_CMPLOG); + + } + if ((status & FS_OPT_SNAPSHOT) == FS_OPT_SNAPSHOT) { fsrv->snapshot = 1; diff --git a/src/afl-fuzz-redqueen.c b/src/afl-fuzz-redqueen.c index b5dc23c2..c97080c7 100644 --- a/src/afl-fuzz-redqueen.c +++ b/src/afl-fuzz-redqueen.c @@ -28,8 +28,6 @@ #include "afl-fuzz.h" #include "cmplog.h" -#define VARIANT 6 - //#define _DEBUG //#define CMPLOG_INTROSPECTION @@ -1855,10 +1853,9 @@ static u8 cmp_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, // we only learn 16 bit + if (hshape > 1) { -#if VARIANT == 6 if (!found_one || afl->queue_cur->is_ascii) { - #ifdef WORD_SIZE_64 +#ifdef WORD_SIZE_64 if (unlikely(is_n)) { if (!found_one || @@ -1872,7 +1869,7 @@ static u8 cmp_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, } else - #endif +#endif { if (!memcmp((u8 *)&o->v0, (u8 *)&orig_o->v0, SHAPE_BYTES(h->shape)) && @@ -1890,75 +1887,6 @@ static u8 cmp_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, } -#else - - u8 same0 = 0, same1 = 0, same2 = 0, same3 = 0, - result = 1 + (found_one << 2); - if (o->v0 != orig_o->v0) { same0 = 8; } - if (o->v1 != orig_o->v1) { same1 = 8; } - if (o->v0 != o->v1) { same2 = 8; } - if (orig_o->v0 != orig_o->v1) { same3 = 8; } - if (check_if_text_buf((u8 *)&o->v0, hshape) < hshape) same0 += 2; - if (check_if_text_buf((u8 *)&o->v1, hshape) < hshape) same1 += 2; - - if (!same2 && !same3) { - - #ifdef WORD_SIZE_64 - if (unlikely(is_n)) { - - if ( - #if VARIANT == 1 - !(!same0 && same1) && - #endif - DICT_ADD_STRATEGY >= same0 + result) { - - try_to_add_to_dictN(afl, s128_v0, hshape); - - } - - if ( - #if VARIANT == 1 - !(same0 && !same1) && - #endif - DICT_ADD_STRATEGY >= same1 + result) { - - try_to_add_to_dictN(afl, s128_v1, hshape); - - } - - } else - - #endif - { - - if ( - #if VARIANT == 1 - !(!same0 && same1) && - #endif - DICT_ADD_STRATEGY >= same0 + result) { - - // fprintf(stderr, "add v0 0x%llx\n", o->v0); - try_to_add_to_dict(afl, o->v0, hshape); - - } - - if ( - #if VARIANT == 1 - !(same0 && !same1) && - #endif - DICT_ADD_STRATEGY >= same1 + result) { - - // fprintf(stderr, "add v1 0x%llx\n", o->v1); - try_to_add_to_dict(afl, o->v1, hshape); - - } - - } - - } - -#endif - } cmp_fuzz_next_iter: @@ -2606,7 +2534,6 @@ static u8 rtn_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, // if (unlikely(!afl->pass_stats[key].total)) { -#if VARIANT == 6 if ((!found_one && (lvl & LVL1)) || afl->queue_cur->is_ascii) { // if (unlikely(!afl->pass_stats[key].total)) { @@ -2643,73 +2570,6 @@ static u8 rtn_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf, } -#else - if (lvl & LVL1) { - - u8 is_txt = 0, l0 = o->v0_len, ol0 = orig_o->v0_len, l1 = o->v1_len, - ol1 = orig_o->v1_len; - if (l0 >= 0x80 || ol0 >= 0x80) { - - is_txt = 1; - l0 -= 0x80; - l1 -= 0x80; - ol0 -= 0x80; - ol1 -= 0x80; - - } - - if (l0 == 0 || l1 == 0 || ol0 == 0 || ol1 == 0 || l0 > 31 || l1 > 31 || - ol0 > 31 || ol1 > 31) { - - l0 = l1 = ol0 = ol1 = hshape; - - } - - u8 same0 = 0, same1 = 0, same2 = 0, same3 = 0, - result = 1 + (found_one << 2); - if (l0 != ol0 || memcmp(o->v0, orig_o->v0, l0) != 0) { same0 = 8; } - if (l1 != ol1 || memcmp(o->v1, orig_o->v1, l1) != 0) { same1 = 8; } - if (l0 != l1 || memcmp(o->v0, o->v1, l0) != 0) { same2 = 8; } - if (ol0 != ol1 || memcmp(orig_o->v0, orig_o->v1, l0) != 0) { same3 = 8; } - if (!is_txt && check_if_text_buf((u8 *)&o->v0, l0) < l0) { same0 += 2; } - if (!is_txt && check_if_text_buf((u8 *)&o->v1, l1) < l1) { same1 += 2; } - - if ( - #if VARIANT < 3 - !(same0 && !same1) && - #endif - #if VARIANT == 4 - (!same2 && same3) - #else - (DICT_ADD_STRATEGY >= same0 + result) - #endif - ) { - - // fprintf(stderr, "add v0 [%u]\"%s\"\n", l0, o->v0); - maybe_add_auto(afl, o->v0, l0); - - } - - if ( - #if VARIANT < 3 - !(!same0 && same1) && - #endif - #if VARIANT == 4 - (same2 && !same3) - #else - (DICT_ADD_STRATEGY >= same1 + result) - #endif - ) { - - // fprintf(stderr, "add v1 [%u]\"%s\"\n", l1, o->v1); - maybe_add_auto(afl, o->v1, l1); - - } - - } - -#endif - rtn_fuzz_next_iter: afl->stage_cur++; -- cgit 1.4.1 From 701997a2a5226c731c15f8d3f512cd8c91df1612 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 3 Nov 2021 13:33:59 +0100 Subject: fix --- src/afl-fuzz-redqueen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/afl-fuzz-redqueen.c b/src/afl-fuzz-redqueen.c index c97080c7..a9c5625f 100644 --- a/src/afl-fuzz-redqueen.c +++ b/src/afl-fuzz-redqueen.c @@ -1383,7 +1383,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h, } - hshape = 7; // recover shape + hshape = 8; // recover shape } -- cgit 1.4.1 From afc84438c60f7a70464ddc353216e0d9fb0c0b49 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Wed, 3 Nov 2021 21:23:03 +0100 Subject: move bitmap and saveauto --- src/afl-fuzz.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/afl-fuzz.c b/src/afl-fuzz.c index 6ec033b2..e06060ab 100644 --- a/src/afl-fuzz.c +++ b/src/afl-fuzz.c @@ -2226,13 +2226,12 @@ int main(int argc, char **argv_orig, char **envp) { } - write_bitmap(afl); - save_auto(afl); - stop_fuzzing: afl->force_ui_update = 1; // ensure the screen is reprinted show_stats(afl); // print the screen one last time + write_bitmap(afl); + save_auto(afl); SAYF(CURSOR_SHOW cLRD "\n\n+++ Testing aborted %s +++\n" cRST, afl->stop_soon == 2 ? "programmatically" : "by user"); -- cgit 1.4.1 From 61039b410c98cb971daff5778b1591d0e5cbfd3e Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Sat, 6 Nov 2021 14:29:16 +0100 Subject: better error report for old qemu/frida cmplog --- include/types.h | 1 + src/afl-forkserver.c | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/include/types.h b/include/types.h index 063e6bbd..bca4ff92 100644 --- a/include/types.h +++ b/include/types.h @@ -47,6 +47,7 @@ typedef uint128_t u128; #define FS_ERROR_SHMAT 8 #define FS_ERROR_MMAP 16 #define FS_ERROR_OLD_CMPLOG 32 +#define FS_ERROR_OLD_CMPLOG_QEMU 64 /* Reporting options */ #define FS_OPT_ENABLED 0x80000001 diff --git a/src/afl-forkserver.c b/src/afl-forkserver.c index 93fd19e1..94c90701 100644 --- a/src/afl-forkserver.c +++ b/src/afl-forkserver.c @@ -347,6 +347,11 @@ static void report_error_and_exit(int error) { "the -c cmplog target was instrumented with an too old afl++ " "version, you need to recompile it."); break; + case FS_ERROR_OLD_CMPLOG_QEMU: + FATAL( + "The AFL++ QEMU/FRIDA loaders are from an older version, for -c you " + "need to recompile it.\n"); + break; default: FATAL("unknown error code %d from fuzzing target!", error); @@ -670,7 +675,15 @@ void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv, if ((status & FS_OPT_NEWCMPLOG) == 0 && fsrv->cmplog_binary) { - report_error_and_exit(FS_ERROR_OLD_CMPLOG); + if (fsrv->qemu_mode || fsrv->frida_mode) { + + report_error_and_exit(FS_ERROR_OLD_CMPLOG_QEMU); + + } else { + + report_error_and_exit(FS_ERROR_OLD_CMPLOG); + + } } -- cgit 1.4.1 From 64704403ca957886b3ad0b55efebaf29906712b8 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Sat, 6 Nov 2021 14:54:53 +0100 Subject: better screen update --- src/afl-fuzz-redqueen.c | 70 +++++++++++++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 29 deletions(-) (limited to 'src') diff --git a/src/afl-fuzz-redqueen.c b/src/afl-fuzz-redqueen.c index a9c5625f..4fa62059 100644 --- a/src/afl-fuzz-redqueen.c +++ b/src/afl-fuzz-redqueen.c @@ -84,6 +84,8 @@ struct range { }; static u32 hshape; +static u64 screen_update; +static u64 last_update; static struct range *add_range(struct range *ranges, u32 start, u32 end) { @@ -273,7 +275,6 @@ static u8 colorization(afl_state_t *afl, u8 *buf, u32 len, u64 start_time = get_cur_time(); #endif - u32 screen_update; u64 orig_hit_cnt, new_hit_cnt, exec_cksum; orig_hit_cnt = afl->queued_paths + afl->unique_crashes; @@ -282,24 +283,6 @@ static u8 colorization(afl_state_t *afl, u8 *buf, u32 len, afl->stage_max = (len << 1); afl->stage_cur = 0; - if (likely(afl->queue_cur->exec_us)) { - - if (likely((100000 / 2) >= afl->queue_cur->exec_us)) { - - screen_update = 100000 / afl->queue_cur->exec_us; - - } else { - - screen_update = 1; - - } - - } else { - - screen_update = 100000; - - } - // in colorization we do not classify counts, hence we have to calculate // the original checksum. if (unlikely(get_exec_checksum(afl, buf, len, &exec_cksum))) { @@ -369,7 +352,7 @@ static u8 colorization(afl_state_t *afl, u8 *buf, u32 len, } - if (++afl->stage_cur % screen_update) { show_stats(afl); }; + if (++afl->stage_cur % screen_update == 0) { show_stats(afl); }; } @@ -780,6 +763,13 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h, u32 its_len = MIN(len - idx, taint_len); + if (afl->fsrv.total_execs - last_update > screen_update) { + + show_stats(afl); + last_update = afl->fsrv.total_execs; + + } + // fprintf(stderr, // "Encode: %llx->%llx into %llx(<-%llx) at idx=%u " // "taint_len=%u shape=%u attr=%u\n", @@ -1438,6 +1428,13 @@ static u8 cmp_extend_encodingN(afl_state_t *afl, struct cmp_header *h, u32 taint_len, u8 *orig_buf, u8 *buf, u8 *cbuf, u32 len, u8 do_reverse, u8 lvl, u8 *status) { + if (afl->fsrv.total_execs - last_update > screen_update) { + + show_stats(afl); + last_update = afl->fsrv.total_execs; + + } + u8 *ptr = (u8 *)&buf[idx]; u8 *o_ptr = (u8 *)&orig_buf[idx]; u8 *p = (u8 *)&pattern; @@ -1926,6 +1923,13 @@ static u8 rtn_extend_encoding(afl_state_t *afl, u8 entry, // (void)(changed_val); //#endif + if (afl->fsrv.total_execs - last_update > screen_update) { + + show_stats(afl); + last_update = afl->fsrv.total_execs; + + } + u8 *pattern, *repl, *o_pattern, *changed_val; u8 l0, l1, ol0, ol1; @@ -2600,6 +2604,23 @@ u8 input_to_state_stage(afl_state_t *afl, u8 *orig_buf, u8 *buf, u32 len) { } struct tainted *taint = NULL; + if (likely(afl->queue_cur->exec_us)) { + + if (likely((100000 / 2) >= afl->queue_cur->exec_us)) { + + screen_update = 100000 / afl->queue_cur->exec_us; + + } else { + + screen_update = 1; + + } + + } else { + + screen_update = 100000; + + } if (!afl->queue_cur->taint || !afl->queue_cur->cmplog_colorinput) { @@ -2700,8 +2721,6 @@ u8 input_to_state_stage(afl_state_t *afl, u8 *orig_buf, u8 *buf, u32 len) { u64 orig_hit_cnt, new_hit_cnt; u64 orig_execs = afl->fsrv.total_execs; orig_hit_cnt = afl->queued_paths + afl->unique_crashes; - u64 screen_update = 100000 / afl->queue_cur->exec_us, - execs = afl->fsrv.total_execs; afl->stage_name = "input-to-state"; afl->stage_short = "its"; @@ -2783,13 +2802,6 @@ u8 input_to_state_stage(afl_state_t *afl, u8 *orig_buf, u8 *buf, u32 len) { } - if (afl->fsrv.total_execs - execs > screen_update) { - - execs = afl->fsrv.total_execs; - show_stats(afl); - - } - } r = 0; -- cgit 1.4.1 From 2ddbaa439ca78b0ae8cc6691d9657f5783b2d5e8 Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Sat, 6 Nov 2021 16:22:38 +0100 Subject: 30% faster hashing speed --- include/xxhash.h | 3112 +++++++++++++++++++++-------------- instrumentation/afl-compiler-rt.o.c | 2 +- src/afl-performance.c | 6 +- 3 files changed, 1894 insertions(+), 1226 deletions(-) (limited to 'src') diff --git a/include/xxhash.h b/include/xxhash.h index 006d3f3d..0ca2b852 100644 --- a/include/xxhash.h +++ b/include/xxhash.h @@ -32,7 +32,12 @@ * - xxHash homepage: https://www.xxhash.com * - xxHash source repository: https://github.com/Cyan4973/xxHash */ - +/*! + * @mainpage xxHash + * + * @file xxhash.h + * xxHash prototypes and implementation + */ /* TODO: update */ /* Notice extracted from xxHash homepage: @@ -45,7 +50,7 @@ Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo Name Speed Q.Score Author xxHash 5.4 GB/s 10 CrapWow 3.2 GB/s 2 Andrew -MumurHash 3a 2.7 GB/s 10 Austin Appleby +MurmurHash 3a 2.7 GB/s 10 Austin Appleby SpookyHash 2.0 GB/s 10 Bob Jenkins SBox 1.4 GB/s 9 Bret Mulvey Lookup3 1.2 GB/s 9 Bob Jenkins @@ -119,29 +124,78 @@ extern "C" { /* * This part deals with the special case where a unit wants to inline xxHash, - * but "xxhash.h" has previously been included without XXH_INLINE_ALL, such - * as part of some previously included *.h header file. + * but "xxhash.h" has previously been included without XXH_INLINE_ALL, + * such as part of some previously included *.h header file. * Without further action, the new include would just be ignored, * and functions would effectively _not_ be inlined (silent failure). * The following macros solve this situation by prefixing all inlined names, * avoiding naming collision with previous inclusions. */ - #ifdef XXH_NAMESPACE - #error "XXH_INLINE_ALL with XXH_NAMESPACE is not supported" - /* - * Note: Alternative: #undef all symbols (it's a pretty large list). - * Without #error: it compiles, but functions are actually not inlined. - */ - #endif +/* Before that, we unconditionally #undef all symbols, + * in case they were already defined with XXH_NAMESPACE. + * They will then be redefined for XXH_INLINE_ALL + */ + #undef XXH_versionNumber +/* XXH32 */ + #undef XXH32 + #undef XXH32_createState + #undef XXH32_freeState + #undef XXH32_reset + #undef XXH32_update + #undef XXH32_digest + #undef XXH32_copyState + #undef XXH32_canonicalFromHash + #undef XXH32_hashFromCanonical +/* XXH64 */ + #undef XXH64 + #undef XXH64_createState + #undef XXH64_freeState + #undef XXH64_reset + #undef XXH64_update + #undef XXH64_digest + #undef XXH64_copyState + #undef XXH64_canonicalFromHash + #undef XXH64_hashFromCanonical +/* XXH3_64bits */ + #undef XXH3_64bits + #undef XXH3_64bits_withSecret + #undef XXH3_64bits_withSeed + #undef XXH3_createState + #undef XXH3_freeState + #undef XXH3_copyState + #undef XXH3_64bits_reset + #undef XXH3_64bits_reset_withSeed + #undef XXH3_64bits_reset_withSecret + #undef XXH3_64bits_update + #undef XXH3_64bits_digest + #undef XXH3_generateSecret +/* XXH3_128bits */ + #undef XXH128 + #undef XXH3_128bits + #undef XXH3_128bits_withSeed + #undef XXH3_128bits_withSecret + #undef XXH3_128bits_reset + #undef XXH3_128bits_reset_withSeed + #undef XXH3_128bits_reset_withSecret + #undef XXH3_128bits_update + #undef XXH3_128bits_digest + #undef XXH128_isEqual + #undef XXH128_cmp + #undef XXH128_canonicalFromHash + #undef XXH128_hashFromCanonical +/* Finally, free the namespace itself */ + #undef XXH_NAMESPACE + +/* employ the namespace for XXH_INLINE_ALL */ #define XXH_NAMESPACE XXH_INLINE_ /* - * Some identifiers (enums, type names) are not symbols, but they must - * still be renamed to avoid redeclaration. + * Some identifiers (enums, type names) are not symbols, + * but they must nonetheless be renamed to avoid redeclaration. * Alternative solution: do not redeclare them. - * However, this requires some #ifdefs, and is a more dispersed action. - * Meanwhile, renaming can be achieved in a single block + * However, this requires some #ifdefs, and has a more dispersed impact. + * Meanwhile, renaming can be achieved in a single place. */ - #define XXH_IPREF(Id) XXH_INLINE_##Id + #define XXH_IPREF(Id) XXH_NAMESPACE##Id #define XXH_OK XXH_IPREF(XXH_OK) #define XXH_ERROR XXH_IPREF(XXH_ERROR) #define XXH_errorcode XXH_IPREF(XXH_errorcode) @@ -166,6 +220,12 @@ extern "C" { #ifndef XXHASH_H_5627135585666179 #define XXHASH_H_5627135585666179 1 + /*! + * @defgroup public Public API + * Contains details on the public xxHash functions. + * @{ + + */ /* specific declaration modes for Windows */ #if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API) #if defined(WIN32) && defined(_MSC_VER) && \ @@ -180,19 +240,24 @@ extern "C" { #endif #endif - /*! - * XXH_NAMESPACE, aka Namespace Emulation: - * - * If you want to include _and expose_ xxHash functions from within your own - * library, but also want to avoid symbol collisions with other libraries - * which may also include xxHash, you can use XXH_NAMESPACE to automatically - * prefix any public symbol from xxhash library with the value of - * XXH_NAMESPACE (therefore, avoid empty or numeric values). - * - * Note that no change is required within the calling program as long as it - * includes `xxhash.h`: Regular symbol names will be automatically translated - * by this header. - */ + #ifdef XXH_DOXYGEN + /*! + * @brief Emulate a namespace by transparently prefixing all symbols. + * + * If you want to include _and expose_ xxHash functions from within your own + * library, but also want to avoid symbol collisions with other libraries + * which may also include xxHash, you can use XXH_NAMESPACE to automatically + * prefix any public symbol from xxhash library with the value of + * XXH_NAMESPACE (therefore, avoid empty or numeric values). + * + * Note that no change is required within the calling program as long as it + * includes `xxhash.h`: Regular symbol names will be automatically + * translated by this header. + */ + #define XXH_NAMESPACE /* YOUR NAME HERE */ + #undef XXH_NAMESPACE + #endif + #ifdef XXH_NAMESPACE #define XXH_CAT(A, B) A##B #define XXH_NAME2(A, B) XXH_CAT(A, B) @@ -264,10 +329,19 @@ extern "C" { ***************************************/ #define XXH_VERSION_MAJOR 0 #define XXH_VERSION_MINOR 8 - #define XXH_VERSION_RELEASE 0 + #define XXH_VERSION_RELEASE 1 #define XXH_VERSION_NUMBER \ (XXH_VERSION_MAJOR * 100 * 100 + XXH_VERSION_MINOR * 100 + \ XXH_VERSION_RELEASE) + +/*! + * @brief Obtains the xxHash version. + * + * This is only useful when xxHash is compiled as a shared library, as it is + * independent of the version defined in the header. + * + * @return `XXH_VERSION_NUMBER` as of when the libray was compiled. + */ XXH_PUBLIC_API unsigned XXH_versionNumber(void); /* **************************** @@ -279,15 +353,24 @@ typedef enum { XXH_OK = 0, XXH_ERROR } XXH_errorcode; /*-********************************************************************** * 32-bit hash ************************************************************************/ - #if !defined(__VMS) && \ + #if defined(XXH_DOXYGEN) /* Don't show include */ +/*! + * @brief An unsigned 32-bit integer. + * + * Not necessarily defined to `uint32_t` but functionally equivalent. + */ +typedef uint32_t XXH32_hash_t; + + #elif !defined(__VMS) && \ (defined(__cplusplus) || \ (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)) #include -typedef uint32_t XXH32_hash_t; +typedef uint32_t XXH32_hash_t; + #else #include #if UINT_MAX == 0xFFFFFFFFUL -typedef unsigned int XXH32_hash_t; +typedef unsigned int XXH32_hash_t; #else #if ULONG_MAX == 0xFFFFFFFFUL typedef unsigned long XXH32_hash_t; @@ -298,24 +381,52 @@ typedef unsigned long XXH32_hash_t; #endif /*! - * XXH32(): - * Calculate the 32-bit hash of sequence "length" bytes stored at memory - * address "input". The memory between input & input+length must be valid - * (allocated and read-accessible). "seed" can be used to alter the result - * predictably. Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher - * benchmark): 5.4 GB/s - * - * Note: XXH3 provides competitive speed for both 32-bit and 64-bit systems, - * and offers true 64/128 bit hash results. It provides a superior level of - * dispersion, and greatly reduces the risks of collisions. + * @} + * + * @defgroup xxh32_family XXH32 family + * @ingroup public + * Contains functions used in the classic 32-bit xxHash algorithm. + * + * @note + * XXH32 is considered rather weak by today's standards. + * The @ref xxh3_family provides competitive speed for both 32-bit and 64-bit + * systems, and offers true 64/128 bit hash results. It provides a superior + * level of dispersion, and greatly reduces the risks of collisions. + * + * @see @ref xxh64_family, @ref xxh3_family : Other xxHash families + * @see @ref xxh32_impl for implementation details + * @{ + + */ + +/*! + * @brief Calculates the 32-bit hash of @p input using xxHash32. + * + * Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark): 5.4 GB/s + * + * @param input The block of data to be hashed, at least @p length bytes in + * size. + * @param length The length of @p input, in bytes. + * @param seed The 32-bit seed to alter the hash's output predictably. + * + * @pre + * The memory between @p input and @p input + @p length must be valid, + * readable, contiguous memory. However, if @p length is `0`, @p input may be + * `NULL`. In C++, this also must be *TriviallyCopyable*. + * + * @return The calculated 32-bit hash value. + * + * @see + * XXH64(), XXH3_64bits_withSeed(), XXH3_128bits_withSeed(), XXH128(): + * Direct equivalents for the other variants of xxHash. + * @see + * XXH32_createState(), XXH32_update(), XXH32_digest(): Streaming version. */ XXH_PUBLIC_API XXH32_hash_t XXH32(const void *input, size_t length, XXH32_hash_t seed); -/******* Streaming *******/ - -/* - * Streaming functions generate the xxHash value from an incrememtal input. +/*! + * Streaming functions generate the xxHash value from an incremental input. * This method is slower than single-call functions, due to state management. * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized. * @@ -336,19 +447,125 @@ XXH_PUBLIC_API XXH32_hash_t XXH32(const void *input, size_t length, * digest, and generate new hash values later on by invoking `XXH*_digest()`. * * When done, release the state using `XXH*_freeState()`. + * + * Example code for incrementally hashing a file: + * @code{.c} + * #include + * #include + * #define BUFFER_SIZE 256 + * + * // Note: XXH64 and XXH3 use the same interface. + * XXH32_hash_t + * hashFile(FILE* stream) + * { + + * XXH32_state_t* state; + * unsigned char buf[BUFFER_SIZE]; + * size_t amt; + * XXH32_hash_t hash; + * + * state = XXH32_createState(); // Create a state + * assert(state != NULL); // Error check here + * XXH32_reset(state, 0xbaad5eed); // Reset state with our seed + * while ((amt = fread(buf, 1, sizeof(buf), stream)) != 0) { + + * XXH32_update(state, buf, amt); // Hash the file in chunks + * } + * hash = XXH32_digest(state); // Finalize the hash + * XXH32_freeState(state); // Clean up + * return hash; + * } + * @endcode + */ + +/*! + * @typedef struct XXH32_state_s XXH32_state_t + * @brief The opaque state struct for the XXH32 streaming API. + * + * @see XXH32_state_s for details. */ +typedef struct XXH32_state_s XXH32_state_t; -typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */ +/*! + * @brief Allocates an @ref XXH32_state_t. + * + * Must be freed with XXH32_freeState(). + * @return An allocated XXH32_state_t on success, `NULL` on failure. + */ XXH_PUBLIC_API XXH32_state_t *XXH32_createState(void); -XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t *statePtr); -XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t * dst_state, - const XXH32_state_t *src_state); +/*! + * @brief Frees an @ref XXH32_state_t. + * + * Must be allocated with XXH32_createState(). + * @param statePtr A pointer to an @ref XXH32_state_t allocated with @ref + * XXH32_createState(). + * @return XXH_OK. + */ +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t *statePtr); +/*! + * @brief Copies one @ref XXH32_state_t to another. + * + * @param dst_state The state to copy to. + * @param src_state The state to copy from. + * @pre + * @p dst_state and @p src_state must not be `NULL` and must not overlap. + */ +XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t * dst_state, + const XXH32_state_t *src_state); +/*! + * @brief Resets an @ref XXH32_state_t to begin a new hash. + * + * This function resets and seeds a state. Call it before @ref XXH32_update(). + * + * @param statePtr The state struct to reset. + * @param seed The 32-bit seed to alter the hash result predictably. + * + * @pre + * @p statePtr must not be `NULL`. + * + * @return @ref XXH_OK on success, @ref XXH_ERROR on failure. + */ XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t *statePtr, XXH32_hash_t seed); + +/*! + * @brief Consumes a block of @p input to an @ref XXH32_state_t. + * + * Call this to incrementally consume blocks of data. + * + * @param statePtr The state struct to update. + * @param input The block of data to be hashed, at least @p length bytes in + * size. + * @param length The length of @p input, in bytes. + * + * @pre + * @p statePtr must not be `NULL`. + * @pre + * The memory between @p input and @p input + @p length must be valid, + * readable, contiguous memory. However, if @p length is `0`, @p input may be + * `NULL`. In C++, this also must be *TriviallyCopyable*. + * + * @return @ref XXH_OK on success, @ref XXH_ERROR on failure. + */ XXH_PUBLIC_API XXH_errorcode XXH32_update(XXH32_state_t *statePtr, const void *input, size_t length); -XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t *statePtr); + +/*! + * @brief Returns the calculated hash value from an @ref XXH32_state_t. + * + * @note + * Calling XXH32_digest() will not affect @p statePtr, so you can update, + * digest, and update again. + * + * @param statePtr The state struct to calculate the hash from. + * + * @pre + * @p statePtr must not be `NULL`. + * + * @return The calculated xxHash32 value from that state. + */ +XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t *statePtr); /******* Canonical representation *******/ @@ -373,48 +590,158 @@ XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t *statePtr); * canonical format. */ +/*! + * @brief Canonical (big endian) representation of @ref XXH32_hash_t. + */ typedef struct { - unsigned char digest[4]; + unsigned char digest[4]; /*!< Hash bytes, big endian */ } XXH32_canonical_t; +/*! + * @brief Converts an @ref XXH32_hash_t to a big endian @ref XXH32_canonical_t. + * + * @param dst The @ref XXH32_canonical_t pointer to be stored to. + * @param hash The @ref XXH32_hash_t to be converted. + * + * @pre + * @p dst must not be `NULL`. + */ XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t *dst, XXH32_hash_t hash); + +/*! + * @brief Converts an @ref XXH32_canonical_t to a native @ref XXH32_hash_t. + * + * @param src The @ref XXH32_canonical_t to convert. + * + * @pre + * @p src must not be `NULL`. + * + * @return The converted hash. + */ XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t *src); + #ifdef __has_attribute + #define XXH_HAS_ATTRIBUTE(x) __has_attribute(x) + #else + #define XXH_HAS_ATTRIBUTE(x) 0 + #endif + + /* C-language Attributes are added in C23. */ + #if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && \ + defined(__has_c_attribute) + #define XXH_HAS_C_ATTRIBUTE(x) __has_c_attribute(x) + #else + #define XXH_HAS_C_ATTRIBUTE(x) 0 + #endif + + #if defined(__cplusplus) && defined(__has_cpp_attribute) + #define XXH_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x) + #else + #define XXH_HAS_CPP_ATTRIBUTE(x) 0 + #endif + + /* + Define XXH_FALLTHROUGH macro for annotating switch case with the 'fallthrough' + attribute introduced in CPP17 and C23. CPP17 : + https://en.cppreference.com/w/cpp/language/attributes/fallthrough C23 : + https://en.cppreference.com/w/c/language/attributes/fallthrough + */ + #if XXH_HAS_C_ATTRIBUTE(x) + #define XXH_FALLTHROUGH [[fallthrough]] + #elif XXH_HAS_CPP_ATTRIBUTE(x) + #define XXH_FALLTHROUGH [[fallthrough]] + #elif XXH_HAS_ATTRIBUTE(__fallthrough__) + #define XXH_FALLTHROUGH __attribute__((fallthrough)) + #else + #define XXH_FALLTHROUGH + #endif + +/*! + * @} + * @ingroup public + * @{ + + */ + #ifndef XXH_NO_LONG_LONG /*-********************************************************************** * 64-bit hash ************************************************************************/ - #if !defined(__VMS) && \ + #if defined(XXH_DOXYGEN) /* don't include */ +/*! + * @brief An unsigned 64-bit integer. + * + * Not necessarily defined to `uint64_t` but functionally equivalent. + */ +typedef uint64_t XXH64_hash_t; + #elif !defined(__VMS) && \ (defined(__cplusplus) || (defined(__STDC_VERSION__) && \ (__STDC_VERSION__ >= 199901L) /* C99 */)) #include typedef uint64_t XXH64_hash_t; #else + #include + #if defined(__LP64__) && ULONG_MAX == 0xFFFFFFFFFFFFFFFFULL +/* LP64 ABI says uint64_t is unsigned long */ +typedef unsigned long XXH64_hash_t; + #else /* the following type must have a width of 64-bit */ typedef unsigned long long XXH64_hash_t; + #endif #endif /*! - * XXH64(): - * Returns the 64-bit hash of sequence of length @length stored at memory - * address @input. - * @seed can be used to alter the result predictably. + * @} + * + * @defgroup xxh64_family XXH64 family + * @ingroup public + * @{ + + * Contains functions used in the classic 64-bit xxHash algorithm. + * + * @note + * XXH3 provides competitive speed for both 32-bit and 64-bit systems, + * and offers true 64/128 bit hash results. It provides a superior level of + * dispersion, and greatly reduces the risks of collisions. + */ + +/*! + * @brief Calculates the 64-bit hash of @p input using xxHash64. * * This function usually runs faster on 64-bit systems, but slower on 32-bit * systems (see benchmark). * - * Note: XXH3 provides competitive speed for both 32-bit and 64-bit systems, - * and offers true 64/128 bit hash results. It provides a superior level of - * dispersion, and greatly reduces the risks of collisions. + * @param input The block of data to be hashed, at least @p length bytes in + * size. + * @param length The length of @p input, in bytes. + * @param seed The 64-bit seed to alter the hash's output predictably. + * + * @pre + * The memory between @p input and @p input + @p length must be valid, + * readable, contiguous memory. However, if @p length is `0`, @p input may be + * `NULL`. In C++, this also must be *TriviallyCopyable*. + * + * @return The calculated 64-bit hash. + * + * @see + * XXH32(), XXH3_64bits_withSeed(), XXH3_128bits_withSeed(), XXH128(): + * Direct equivalents for the other variants of xxHash. + * @see + * XXH64_createState(), XXH64_update(), XXH64_digest(): Streaming version. */ XXH_PUBLIC_API XXH64_hash_t XXH64(const void *input, size_t length, XXH64_hash_t seed); /******* Streaming *******/ +/*! + * @brief The opaque state struct for the XXH64 streaming API. + * + * @see XXH64_state_s for details. + */ typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ XXH_PUBLIC_API XXH64_state_t *XXH64_createState(void); XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t *statePtr); @@ -439,12 +766,15 @@ XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t *dst, XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t *src); -/*-********************************************************************** - * XXH3 64-bit variant - ************************************************************************/ +/*! + * @} + * ************************************************************************ + * @defgroup xxh3_family XXH3 family + * @ingroup public + * @{ -/* ************************************************************************ - * XXH3 is a new hash algorithm featuring: + * + * XXH3 is a more recent hash algorithm featuring: * - Improved speed for both small and large inputs * - True 64-bit and 128-bit outputs * - SIMD acceleration @@ -454,41 +784,38 @@ XXH64_hashFromCanonical(const XXH64_canonical_t *src); * * https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html * - * In general, expect XXH3 to run about ~2x faster on large inputs and >3x - * faster on small ones compared to XXH64, though exact differences depend on - * the platform. - * - * The algorithm is portable: Like XXH32 and XXH64, it generates the same hash - * on all platforms. + * Compared to XXH64, expect XXH3 to run approximately + * ~2x faster on large inputs and >3x faster on small ones, + * exact differences vary depending on platform. * - * It benefits greatly from SIMD and 64-bit arithmetic, but does not require it. - * - * Almost all 32-bit and 64-bit targets that can run XXH32 smoothly can run - * XXH3 at competitive speeds, even if XXH64 runs slowly. Further details are - * explained in the implementation. + * XXH3's speed benefits greatly from SIMD and 64-bit arithmetic, + * but does not require it. + * Any 32-bit and 64-bit targets that can run XXH32 smoothly + * can run XXH3 at competitive speeds, even without vector support. + * Further details are explained in the implementation. * * Optimized implementations are provided for AVX512, AVX2, SSE2, NEON, POWER8, - * ZVector and scalar targets. This can be controlled with the XXH_VECTOR macro. + * ZVector and scalar targets. This can be controlled via the XXH_VECTOR macro. + * + * XXH3 implementation is portable: + * it has a generic C90 formulation that can be compiled on any platform, + * all implementations generage exactly the same hash value on all platforms. + * Starting from v0.8.0, it's also labelled "stable", meaning that + * any future version will also generate the same hash value. * * XXH3 offers 2 variants, _64bits and _128bits. - * When only 64 bits are needed, prefer calling the _64bits variant, as it - * reduces the amount of mixing, resulting in faster speed on small inputs. * + * When only 64 bits are needed, prefer invoking the _64bits variant, as it + * reduces the amount of mixing, resulting in faster speed on small inputs. * It's also generally simpler to manipulate a scalar return type than a struct. * - * The 128-bit version adds additional strength, but it is slightly slower. - * - * Return values of XXH3 and XXH128 are officially finalized starting - * with v0.8.0 and will no longer change in future versions. - * Avoid storing values from before that release in long-term storage. - * - * Results produced by v0.7.x are not comparable with results from v0.7.y. - * However, the API is completely stable, and it can safely be used for - * ephemeral data (local sessions). - * * The API supports one-shot hashing, streaming mode, and custom secrets. */ +/*-********************************************************************** + * XXH3 64-bit variant + ************************************************************************/ + /* XXH3_64bits(): * default 64-bit variant, using default secret and default seed of 0. * It's the fastest variant. */ @@ -504,20 +831,28 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void *data, size_t len); XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed(const void *data, size_t len, XXH64_hash_t seed); - /* - * XXH3_64bits_withSecret(): - * It's possible to provide any blob of bytes as a "secret" to generate the - * hash. This makes it more difficult for an external actor to prepare an - * intentional collision. The main condition is that secretSize *must* be - * large enough (>= XXH3_SECRET_SIZE_MIN). However, the quality of produced - * hash values depends on secret's entropy. Technically, the secret must - * look like a bunch of random bytes. Avoid "trivial" or structured data - * such as repeated sequences or a text document. Whenever unsure about the - * "randomness" of the blob of bytes, consider relabelling it as a "custom - * seed" instead, and employ "XXH3_generateSecret()" (see below) to generate - * a high entropy secret derived from the custom seed. + /*! + * The bare minimum size for a custom secret. + * + * @see + * XXH3_64bits_withSecret(), XXH3_64bits_reset_withSecret(), + * XXH3_128bits_withSecret(), XXH3_128bits_reset_withSecret(). */ #define XXH3_SECRET_SIZE_MIN 136 + +/* + * XXH3_64bits_withSecret(): + * It's possible to provide any blob of bytes as a "secret" to generate the + * hash. This makes it more difficult for an external actor to prepare an + * intentional collision. The main condition is that secretSize *must* be large + * enough (>= XXH3_SECRET_SIZE_MIN). However, the quality of produced hash + * values depends on secret's entropy. Technically, the secret must look like a + * bunch of random bytes. Avoid "trivial" or structured data such as repeated + * sequences or a text document. Whenever unsure about the "randomness" of the + * blob of bytes, consider relabelling it as a "custom seed" instead, and employ + * "XXH3_generateSecret()" (see below) to generate a high entropy secret derived + * from the custom seed. + */ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(const void *data, size_t len, const void *secret, size_t secretSize); @@ -529,6 +864,12 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(const void *data, size_t len, * As a consequence, streaming is slower than one-shot hashing. * For better performance, prefer one-shot functions whenever applicable. */ + +/*! + * @brief The state struct for the XXH3 streaming API. + * + * @see XXH3_state_s for details. + */ typedef struct XXH3_state_s XXH3_state_t; XXH_PUBLIC_API XXH3_state_t *XXH3_createState(void); XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t *statePtr); @@ -572,10 +913,16 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest(const XXH3_state_t *statePtr); * XXH3 128-bit variant ************************************************************************/ +/*! + * @brief The return value from 128-bit hashes. + * + * Stored in little endian order, although the fields themselves are in native + * endianness. + */ typedef struct { - XXH64_hash_t low64; - XXH64_hash_t high64; + XXH64_hash_t low64; /*!< `value & 0xFFFFFFFFFFFFFFFF` */ + XXH64_hash_t high64; /*!< `value >> 64` */ } XXH128_hash_t; @@ -649,6 +996,9 @@ XXH128_hashFromCanonical(const XXH128_canonical_t *src); #endif /* XXH_NO_LONG_LONG */ +/*! + * @} + */ #endif /* XXHASH_H_5627135585666179 */ #if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) @@ -660,7 +1010,7 @@ XXH128_hashFromCanonical(const XXH128_canonical_t *src); * These declarations should only be used with static linking. * Never use them in association with dynamic linking! ***************************************************************************** - */ +*/ /* * These definitions are only present to allow static allocation @@ -668,41 +1018,72 @@ XXH128_hashFromCanonical(const XXH128_canonical_t *src); * Never **ever** access their members directly. */ +/*! + * @internal + * @brief Structure for XXH32 streaming API. + * + * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY, + * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is + * an opaque type. This allows fields to safely be changed. + * + * Typedef'd to @ref XXH32_state_t. + * Do not access the members of this struct directly. + * @see XXH64_state_s, XXH3_state_s + */ struct XXH32_state_s { - XXH32_hash_t total_len_32; - XXH32_hash_t large_len; - XXH32_hash_t v1; - XXH32_hash_t v2; - XXH32_hash_t v3; - XXH32_hash_t v4; - XXH32_hash_t mem32[4]; - XXH32_hash_t memsize; - XXH32_hash_t - reserved; /* never read nor write, might be removed in a future version */ + XXH32_hash_t total_len_32; /*!< Total length hashed, modulo 2^32 */ + XXH32_hash_t large_len; /*!< Whether the hash is >= 16 (handles @ref + total_len_32 overflow) */ + XXH32_hash_t v1; /*!< First accumulator lane */ + XXH32_hash_t v2; /*!< Second accumulator lane */ + XXH32_hash_t v3; /*!< Third accumulator lane */ + XXH32_hash_t v4; /*!< Fourth accumulator lane */ + XXH32_hash_t mem32[4]; /*!< Internal buffer for partial reads. Treated as + unsigned char[16]. */ + XXH32_hash_t memsize; /*!< Amount of data in @ref mem32 */ + XXH32_hash_t reserved; /*!< Reserved field. Do not read or write to it, it may + be removed. */ }; /* typedef'd to XXH32_state_t */ #ifndef XXH_NO_LONG_LONG /* defined when there is no 64-bit support */ +/*! + * @internal + * @brief Structure for XXH64 streaming API. + * + * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY, + * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is + * an opaque type. This allows fields to safely be changed. + * + * Typedef'd to @ref XXH64_state_t. + * Do not access the members of this struct directly. + * @see XXH32_state_s, XXH3_state_s + */ struct XXH64_state_s { - XXH64_hash_t total_len; - XXH64_hash_t v1; - XXH64_hash_t v2; - XXH64_hash_t v3; - XXH64_hash_t v4; - XXH64_hash_t mem64[4]; - XXH32_hash_t memsize; - XXH32_hash_t reserved32; /* required for padding anyway */ - XXH64_hash_t reserved64; /* never read nor write, might be removed in a future - version */ + XXH64_hash_t total_len; /*!< Total length hashed. This is always 64-bit. */ + XXH64_hash_t v1; /*!< First accumulator lane */ + XXH64_hash_t v2; /*!< Second accumulator lane */ + XXH64_hash_t v3; /*!< Third accumulator lane */ + XXH64_hash_t v4; /*!< Fourth accumulator lane */ + XXH64_hash_t mem64[4]; /*!< Internal buffer for partial reads. Treated as + unsigned char[32]. */ + XXH32_hash_t memsize; /*!< Amount of data in @ref mem64 */ + XXH32_hash_t reserved32; /*!< Reserved field, needed for padding anyways*/ + XXH64_hash_t reserved64; /*!< Reserved field. Do not read or write to it, it + may be removed. */ }; /* typedef'd to XXH64_state_t */ - #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11+ */ + #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* >= C11 \ + */ #include #define XXH_ALIGN(n) alignas(n) + #elif defined(__cplusplus) && (__cplusplus >= 201103L) /* >= C++11 */ + /* In C++ alignas() is a keyword */ + #define XXH_ALIGN(n) alignas(n) #elif defined(__GNUC__) #define XXH_ALIGN(n) __attribute__((aligned(n))) #elif defined(_MSC_VER) @@ -713,39 +1094,94 @@ struct XXH64_state_s { /* Old GCC versions only accept the attribute after the type in structures. */ - #if !(defined(__STDC_VERSION__) && \ - (__STDC_VERSION__ >= 201112L)) /* C11+ */ \ + #if !(defined(__STDC_VERSION__) && \ + (__STDC_VERSION__ >= 201112L)) /* C11+ */ \ + && !(defined(__cplusplus) && (__cplusplus >= 201103L)) /* >= C++11 */ \ && defined(__GNUC__) #define XXH_ALIGN_MEMBER(align, type) type XXH_ALIGN(align) #else #define XXH_ALIGN_MEMBER(align, type) XXH_ALIGN(align) type #endif + /*! + * @brief The size of the internal XXH3 buffer. + * + * This is the optimal update size for incremental hashing. + * + * @see XXH3_64b_update(), XXH3_128b_update(). + */ #define XXH3_INTERNALBUFFER_SIZE 256 + + /*! + * @brief Default size of the secret buffer (and @ref XXH3_kSecret). + * + * This is the size used in @ref XXH3_kSecret and the seeded functions. + * + * Not to be confused with @ref XXH3_SECRET_SIZE_MIN. + */ #define XXH3_SECRET_DEFAULT_SIZE 192 + +/*! + * @internal + * @brief Structure for XXH3 streaming API. + * + * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY, + * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. + * Otherwise it is an opaque type. + * Never use this definition in combination with dynamic library. + * This allows fields to safely be changed in the future. + * + * @note ** This structure has a strict alignment requirement of 64 bytes!! ** + * Do not allocate this with `malloc()` or `new`, + * it will not be sufficiently aligned. + * Use @ref XXH3_createState() and @ref XXH3_freeState(), or stack allocation. + * + * Typedef'd to @ref XXH3_state_t. + * Do never access the members of this struct directly. + * + * @see XXH3_INITSTATE() for stack initialization. + * @see XXH3_createState(), XXH3_freeState(). + * @see XXH32_state_s, XXH64_state_s + */ struct XXH3_state_s { XXH_ALIGN_MEMBER(64, XXH64_hash_t acc[8]); - /* used to store a custom secret generated from a seed */ + /*!< The 8 accumulators. Similar to `vN` in @ref XXH32_state_s::v1 and @ref + * XXH64_state_s */ XXH_ALIGN_MEMBER(64, unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]); + /*!< Used to store a custom secret generated from a seed. */ XXH_ALIGN_MEMBER(64, unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]); - XXH32_hash_t bufferedSize; - XXH32_hash_t reserved32; - size_t nbStripesSoFar; - XXH64_hash_t totalLen; - size_t nbStripesPerBlock; - size_t secretLimit; - XXH64_hash_t seed; - XXH64_hash_t reserved64; - const unsigned char *extSecret; /* reference to external secret; - * if == NULL, use .customSecret instead */ + /*!< The internal buffer. @see XXH32_state_s::mem32 */ + XXH32_hash_t bufferedSize; + /*!< The amount of memory in @ref buffer, @see XXH32_state_s::memsize */ + XXH32_hash_t reserved32; + /*!< Reserved field. Needed for padding on 64-bit. */ + size_t nbStripesSoFar; + /*!< Number or stripes processed. */ + XXH64_hash_t totalLen; + /*!< Total length hashed. 64-bit even on 32-bit targets. */ + size_t nbStripesPerBlock; + /*!< Number of stripes per block. */ + size_t secretLimit; + /*!< Size of @ref customSecret or @ref extSecret */ + XXH64_hash_t seed; + /*!< Seed for _withSeed variants. Must be zero otherwise, @see + * XXH3_INITSTATE() */ + XXH64_hash_t reserved64; + /*!< Reserved field. */ + const unsigned char *extSecret; + /*!< Reference to an external secret for the _withSecret variants, NULL + * for other variants. */ /* note: there may be some padding at the end due to alignment on 64 bytes */ }; /* typedef'd to XXH3_state_t */ #undef XXH_ALIGN_MEMBER - /* When the XXH3_state_t structure is merely emplaced on stack, + /*! + * @brief Initializes a stack-allocated `XXH3_state_s`. + * + * When the @ref XXH3_state_t structure is merely emplaced on stack, * it should be initialized with XXH3_INITSTATE() or a memset() * in case its first reset uses XXH3_NNbits_reset_withSeed(). * This init can be omitted if the first reset uses default or _withSecret @@ -802,7 +1238,6 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void *data, size_t len, XXH64_hash_t seed); #endif /* XXH_NO_LONG_LONG */ - #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) #define XXH_IMPLEMENTATION #endif @@ -844,81 +1279,183 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void *data, size_t len, /* ************************************* * Tuning parameters ***************************************/ + /*! - * XXH_FORCE_MEMORY_ACCESS: - * By default, access to unaligned memory is controlled by `memcpy()`, which - * is safe and portable. - * - * Unfortunately, on some target/compiler combinations, the generated assembly - * is sub-optimal. + * @defgroup tuning Tuning parameters + * @{ + * - * The below switch allow selection of a different access method - * in the search for improved performance. - * Method 0 (default): - * Use `memcpy()`. Safe and portable. Default. - * Method 1: - * `__attribute__((packed))` statement. It depends on compiler extensions - * and is therefore not portable. - * This method is safe if your compiler supports it, and *generally* as - * fast or faster than `memcpy`. - * Method 2: - * Direct access via cast. This method doesn't depend on the compiler but - * violates the C standard. - * It can generate buggy code on targets which do not support unaligned - * memory accesses. - * But in some circumstances, it's the only known way to get the most - * performance (example: GCC + ARMv6) - * Method 3: - * Byteshift. This can generate the best code on old compilers which don't - * inline small `memcpy()` calls, and it might also be faster on - * big-endian systems which lack a native byteswap instruction. See - * https://stackoverflow.com/a/32095106/646947 for details. Prefer these - * methods in priority order (0 > 1 > 2 > 3) + * Various macros to control xxHash's behavior. */ + #ifdef XXH_DOXYGEN + /*! + * @brief Define this to disable 64-bit code. + * + * Useful if only using the @ref xxh32_family and you have a strict C90 + * compiler. + */ + #define XXH_NO_LONG_LONG + #undef XXH_NO_LONG_LONG /* don't actually */ + /*! + * @brief Controls how unaligned memory is accessed. + * + * By default, access to unaligned memory is controlled by `memcpy()`, which + * is safe and portable. + * + * Unfortunately, on some target/compiler combinations, the generated + * assembly is sub-optimal. + * + * The below switch allow selection of a different access method + * in the search for improved performance. + * + * @par Possible options: + * + * - `XXH_FORCE_MEMORY_ACCESS=0` (default): `memcpy` + * @par + * Use `memcpy()`. Safe and portable. Note that most modern compilers + * will eliminate the function call and treat it as an unaligned access. + * + * - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((packed))` + * @par + * Depends on compiler extensions and is therefore not portable. + * This method is safe _if_ your compiler supports it, + * and *generally* as fast or faster than `memcpy`. + * + * - `XXH_FORCE_MEMORY_ACCESS=2`: Direct cast + * @par + * Casts directly and dereferences. This method doesn't depend on the + * compiler, but it violates the C standard as it directly dereferences + * an unaligned pointer. It can generate buggy code on targets which do not + * support unaligned memory accesses, but in some circumstances, it's + * the only known way to get the most performance. + * + * - `XXH_FORCE_MEMORY_ACCESS=3`: Byteshift + * @par + * Also portable. This can generate the best code on old compilers which + * don't inline small `memcpy()` calls, and it might also be faster on + * big-endian systems which lack a native byteswap instruction. However, + * some compilers will emit literal byteshifts even if the target supports + * unaligned access. + * . + * + * @warning + * Methods 1 and 2 rely on implementation-defined behavior. Use these with + * care, as what works on one compiler/platform/optimization level may + * cause another to read garbage data or even crash. + * + * See https://stackoverflow.com/a/32095106/646947 for details. + * + * Prefer these methods in priority order (0 > 3 > 1 > 2) + */ + #define XXH_FORCE_MEMORY_ACCESS 0 + /*! + * @def XXH_ACCEPT_NULL_INPUT_POINTER + * @brief Whether to add explicit `NULL` checks. + * + * If the input pointer is `NULL` and the length is non-zero, xxHash's + * default behavior is to dereference it, triggering a segfault. + * + * When this macro is enabled, xxHash actively checks the input for a null + * pointer. If it is, the result for null input pointers is the same as a + * zero-length input. + */ + #define XXH_ACCEPT_NULL_INPUT_POINTER 0 + /*! + * @def XXH_FORCE_ALIGN_CHECK + * @brief If defined to non-zero, adds a special path for aligned inputs + * (XXH32() and XXH64() only). + * + * This is an important performance trick for architectures without decent + * unaligned memory access performance. + * + * It checks for input alignment, and when conditions are met, uses a "fast + * path" employing direct 32-bit/64-bit reads, resulting in _dramatically + * faster_ read speed. + * + * The check costs one initial branch per hash, which is generally + * negligible, but not zero. + * + * Moreover, it's not useful to generate an additional code path if memory + * access uses the same instruction for both aligned and unaligned + * addresses (e.g. x86 and aarch64). + * + * In these cases, the alignment check can be removed by setting this macro + * to 0. Then the code will always use unaligned memory access. Align check + * is automatically disabled on x86, x64 & arm64, which are platforms known + * to offer good unaligned memory accesses performance. + * + * This option does not affect XXH3 (only XXH32 and XXH64). + */ + #define XXH_FORCE_ALIGN_CHECK 0 + + /*! + * @def XXH_NO_INLINE_HINTS + * @brief When non-zero, sets all functions to `static`. + * + * By default, xxHash tries to force the compiler to inline almost all + * internal functions. + * + * This can usually improve performance due to reduced jumping and improved + * constant folding, but significantly increases the size of the binary + * which might not be favorable. + * + * Additionally, sometimes the forced inlining can be detrimental to + * performance, depending on the architecture. + * + * XXH_NO_INLINE_HINTS marks all internal functions as static, giving the + * compiler full control on whether to inline or not. + * + * When not optimizing (-O0), optimizing for size (-Os, -Oz), or using + * -fno-inline with GCC or Clang, this will automatically be defined. + */ + #define XXH_NO_INLINE_HINTS 0 + + /*! + * @def XXH_REROLL + * @brief Whether to reroll `XXH32_finalize`. + * + * For performance, `XXH32_finalize` uses an unrolled loop + * in the form of a switch statement. + * + * This is not always desirable, as it generates larger code, + * and depending on the architecture, may even be slower + * + * This is automatically defined with `-Os`/`-Oz` on GCC and Clang. + */ + #define XXH_REROLL 0 + + /*! + * @internal + * @brief Redefines old internal names. + * + * For compatibility with code that uses xxHash's internals before the names + * were changed to improve namespacing. There is no other reason to use + * this. + */ + #define XXH_OLD_NAMES + #undef XXH_OLD_NAMES /* don't actually use, it is ugly. */ + #endif /* XXH_DOXYGEN */ +/*! + * @} + */ + #ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command \ line for example */ - #if !defined(__clang__) && defined(__GNUC__) && \ - defined(__ARM_FEATURE_UNALIGNED) && defined(__ARM_ARCH) && \ - (__ARM_ARCH == 6) - #define XXH_FORCE_MEMORY_ACCESS 2 - #elif !defined(__clang__) && \ - ((defined(__INTEL_COMPILER) && !defined(_WIN32)) || \ - (defined(__GNUC__) && (defined(__ARM_ARCH) && __ARM_ARCH >= 7))) + /* prefer __packed__ structures (method 1) for gcc on armv7+ and mips */ + #if !defined(__clang__) && \ + ((defined(__INTEL_COMPILER) && !defined(_WIN32)) || \ + (defined(__GNUC__) && \ + ((defined(__ARM_ARCH) && __ARM_ARCH >= 7) || \ + (defined(__mips__) && (__mips <= 5 || __mips_isa_rev < 6) && \ + (!defined(__mips16) || defined(__mips_mips16e2)))))) #define XXH_FORCE_MEMORY_ACCESS 1 #endif #endif - /*! - * XXH_ACCEPT_NULL_INPUT_POINTER: - * If the input pointer is NULL, xxHash's default behavior is to dereference - * it, triggering a segfault. When this macro is enabled, xxHash actively - * checks the input for a null pointer. If it is, the result for null input - * pointers is the same as a zero-length input. - */ #ifndef XXH_ACCEPT_NULL_INPUT_POINTER /* can be defined externally */ #define XXH_ACCEPT_NULL_INPUT_POINTER 0 #endif - /*! - * XXH_FORCE_ALIGN_CHECK: - * This is an important performance trick - * for architectures without decent unaligned memory access performance. - * It checks for input alignment, and when conditions are met, - * uses a "fast path" employing direct 32-bit/64-bit read, - * resulting in _dramatically faster_ read speed. - * - * The check costs one initial branch per hash, which is generally negligible, - * but not zero. Moreover, it's not useful to generate binary for an - * additional code path if memory access uses same instruction for both - * aligned and unaligned adresses. - * - * In these cases, the alignment check can be removed by setting this macro to - * 0. Then the code will always use unaligned memory access. Align check is - * automatically disabled on x86, x64 & arm64, which are platforms known to - * offer good unaligned memory accesses performance. - * - * This option does not affect XXH3 (only XXH32 and XXH64). - */ #ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ #if defined(__i386) || defined(__x86_64__) || defined(__aarch64__) || \ defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64) /* visual */ @@ -928,25 +1465,6 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void *data, size_t len, #endif #endif - /*! - * XXH_NO_INLINE_HINTS: - * - * By default, xxHash tries to force the compiler to inline almost all - * internal functions. - * - * This can usually improve performance due to reduced jumping and improved - * constant folding, but significantly increases the size of the binary which - * might not be favorable. - * - * Additionally, sometimes the forced inlining can be detrimental to - * performance, depending on the architecture. - * - * XXH_NO_INLINE_HINTS marks all internal functions as static, giving the - * compiler full control on whether to inline or not. - * - * When not optimizing (-O0), optimizing for size (-Os, -Oz), or using - * -fno-inline with GCC or Clang, this will automatically be defined. - */ #ifndef XXH_NO_INLINE_HINTS #if defined(__OPTIMIZE_SIZE__) /* -Os, -Oz */ \ || defined(__NO_INLINE__) /* -O0, -fno-inline */ @@ -956,44 +1474,57 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void *data, size_t len, #endif #endif - /*! - * XXH_REROLL: - * Whether to reroll XXH32_finalize, and XXH64_finalize, - * instead of using an unrolled jump table/if statement loop. - * - * This is automatically defined on -Os/-Oz on GCC and Clang. - */ #ifndef XXH_REROLL - #if defined(__OPTIMIZE_SIZE__) + #if defined(__OPTIMIZE_SIZE__) /* -Os, -Oz */ || \ + (defined(__GNUC__) && !defined(__clang__)) + /* The if/then loop is preferable to switch/case on gcc (on x64) */ #define XXH_REROLL 1 #else #define XXH_REROLL 0 #endif #endif + /*! + * @defgroup impl Implementation + * @{ + + */ + /* ************************************* * Includes & Memory related functions ***************************************/ - /*! + /* * Modify the local functions below should you wish to use * different memory routines for malloc() and free() */ #include +/*! + * @internal + * @brief Modify this function to use a different routine than malloc(). + */ static void *XXH_malloc(size_t s) { return malloc(s); } +/*! + * @internal + * @brief Modify this function to use a different routine than free(). + */ static void XXH_free(void *p) { free(p); } - /*! and for memcpy() */ #include + +/*! + * @internal + * @brief Modify this function to use a different routine than memcpy(). + */ static void *XXH_memcpy(void *dest, const void *src, size_t size) { return memcpy(dest, src, size); @@ -1037,7 +1568,11 @@ static void *XXH_memcpy(void *dest, const void *src, size_t size) { /* ************************************* * Debug ***************************************/ - /* + /*! + * @ingroup tuning + * @def XXH_DEBUGLEVEL + * @brief Sets the debugging level. + * * XXH_DEBUGLEVEL is expected to be defined externally, typically via the * compiler's command line options. The value must be a number. */ @@ -1057,12 +1592,58 @@ static void *XXH_memcpy(void *dest, const void *src, size_t size) { #endif /* note: use after variable declarations */ - #define XXH_STATIC_ASSERT(c) \ - do { \ - \ - enum { XXH_sa = 1 / (int)(!!(c)) }; \ - \ - } while (0) + #ifndef XXH_STATIC_ASSERT + #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11 */ + #include + #define XXH_STATIC_ASSERT_WITH_MESSAGE(c, m) \ + do { \ + \ + static_assert((c), m); \ + \ + } while (0) + #elif defined(__cplusplus) && (__cplusplus >= 201103L) /* C++11 */ + #define XXH_STATIC_ASSERT_WITH_MESSAGE(c, m) \ + do { \ + \ + static_assert((c), m); \ + \ + } while (0) + #else + #define XXH_STATIC_ASSERT_WITH_MESSAGE(c, m) \ + do { \ + \ + struct xxh_sa { \ + \ + char x[(c) ? 1 : -1]; \ + \ + }; \ + \ + } while (0) + #endif + #define XXH_STATIC_ASSERT(c) XXH_STATIC_ASSERT_WITH_MESSAGE((c), #c) + #endif + + /*! + * @internal + * @def XXH_COMPILER_GUARD(var) + * @brief Used to prevent unwanted optimizations for @p var. + * + * It uses an empty GCC inline assembly statement with a register constraint + * which forces @p var into a general purpose register (eg eax, ebx, ecx + * on x86) and marks it as modified. + * + * This is used in a few places to avoid unwanted autovectorization (e.g. + * XXH32_round()). All vectorization we want is explicit via intrinsics, + * and _usually_ isn't wanted elsewhere. + * + * We also use it to prevent unwanted constant folding for AArch64 in + * XXH3_initCustomSecret_scalar(). + */ + #ifdef __GNUC__ + #define XXH_COMPILER_GUARD(var) __asm__ __volatile__("" : "+r"(var)) + #else + #define XXH_COMPILER_GUARD(var) ((void)0) + #endif /* ************************************* * Basic Types @@ -1085,6 +1666,56 @@ typedef XXH32_hash_t xxh_u32; /* *** Memory access *** */ +/*! + * @internal + * @fn xxh_u32 XXH_read32(const void* ptr) + * @brief Reads an unaligned 32-bit integer from @p ptr in native endianness. + * + * Affected by @ref XXH_FORCE_MEMORY_ACCESS. + * + * @param ptr The pointer to read from. + * @return The 32-bit native endian integer from the bytes at @p ptr. + */ + +/*! + * @internal + * @fn xxh_u32 XXH_readLE32(const void* ptr) + * @brief Reads an unaligned 32-bit little endian integer from @p ptr. + * + * Affected by @ref XXH_FORCE_MEMORY_ACCESS. + * + * @param ptr The pointer to read from. + * @return The 32-bit little endian integer from the bytes at @p ptr. + */ + +/*! + * @internal + * @fn xxh_u32 XXH_readBE32(const void* ptr) + * @brief Reads an unaligned 32-bit big endian integer from @p ptr. + * + * Affected by @ref XXH_FORCE_MEMORY_ACCESS. + * + * @param ptr The pointer to read from. + * @return The 32-bit big endian integer from the bytes at @p ptr. + */ + +/*! + * @internal + * @fn xxh_u32 XXH_readLE32_align(const void* ptr, XXH_alignment align) + * @brief Like @ref XXH_readLE32(), but has an option for aligned reads. + * + * Affected by @ref XXH_FORCE_MEMORY_ACCESS. + * Note that when @ref XXH_FORCE_ALIGN_CHECK == 0, the @p align parameter is + * always @ref XXH_alignment::XXH_unaligned. + * + * @param ptr The pointer to read from. + * @param align Whether @p ptr is aligned. + * @pre + * If @p align == @ref XXH_alignment::XXH_aligned, @p ptr must be 4 byte + * aligned. + * @return The 32-bit little endian integer from the bytes at @p ptr. + */ + #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS == 3)) /* * Manual byteshift. Best for old compilers which don't inline memcpy. @@ -1146,16 +1777,23 @@ static xxh_u32 XXH_read32(const void *memPtr) { #endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ -/* *** Endianess *** */ -typedef enum { XXH_bigEndian = 0, XXH_littleEndian = 1 } XXH_endianess; + /* *** Endianness *** */ /*! - * XXH_CPU_LITTLE_ENDIAN: + * @ingroup tuning + * @def XXH_CPU_LITTLE_ENDIAN + * @brief Whether the target is little endian. + * * Defined to 1 if the target is little endian, or 0 if it is big endian. * It can be defined externally, for example on the compiler command line. * - * If it is not defined, a runtime check (which is usually constant folded) - * is used instead. + * If it is not defined, + * a runtime check (which is usually constant folded) is used instead. + * + * @note + * This is not necessarily defined to an integer constant. + * + * @see XXH_isLittleEndian() for the runtime check. */ #ifndef XXH_CPU_LITTLE_ENDIAN /* @@ -1170,8 +1808,11 @@ typedef enum { XXH_bigEndian = 0, XXH_littleEndian = 1 } XXH_endianess; (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) #define XXH_CPU_LITTLE_ENDIAN 0 #else -/* - * runtime test, presumed to simplify to a constant by compiler +/*! + * @internal + * @brief Runtime check for @ref XXH_CPU_LITTLE_ENDIAN. + * + * Most compilers will constant fold this. */ static int XXH_isLittleEndian(void) { @@ -1189,7 +1830,7 @@ static int XXH_isLittleEndian(void) { return one.c[0]; } - +\ #define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian() #endif #endif @@ -1205,6 +1846,19 @@ static int XXH_isLittleEndian(void) { #define XXH_HAS_BUILTIN(x) 0 #endif + /*! + * @internal + * @def XXH_rotl32(x,r) + * @brief 32-bit rotate left. + * + * @param x The 32-bit integer to be rotated. + * @param r The number of bits to rotate. + * @pre + * @p r > 0 && @p r < 32 + * @note + * @p x and @p r may be evaluated multiple times. + * @return The rotated result. + */ #if !defined(NO_CLANG_BUILTIN) && XXH_HAS_BUILTIN(__builtin_rotateleft32) && \ XXH_HAS_BUILTIN(__builtin_rotateleft64) #define XXH_rotl32 __builtin_rotateleft32 @@ -1219,6 +1873,14 @@ static int XXH_isLittleEndian(void) { #define XXH_rotl64(x, r) (((x) << (r)) | ((x) >> (64 - (r)))) #endif + /*! + * @internal + * @fn xxh_u32 XXH_swap32(xxh_u32 x) + * @brief A 32-bit byteswap. + * + * @param x The 32-bit integer to byteswap. + * @return @p x, byteswapped. + */ #if defined(_MSC_VER) /* Visual Studio */ #define XXH_swap32 _byteswap_ulong #elif XXH_GCC_VERSION >= 403 @@ -1236,7 +1898,17 @@ static xxh_u32 XXH_swap32(xxh_u32 x) { /* *************************** * Memory reads *****************************/ -typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; + +/*! + * @internal + * @brief Enum to indicate whether a pointer is aligned. + */ +typedef enum { + + XXH_aligned, /*!< Aligned */ + XXH_unaligned /*!< Possibly unaligned */ + +} XXH_alignment; /* * XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load. @@ -1295,6 +1967,7 @@ XXH_FORCE_INLINE xxh_u32 XXH_readLE32_align(const void * ptr, /* ************************************* * Misc ***************************************/ +/*! @ingroup public */ XXH_PUBLIC_API unsigned XXH_versionNumber(void) { return XXH_VERSION_NUMBER; @@ -1304,16 +1977,19 @@ XXH_PUBLIC_API unsigned XXH_versionNumber(void) { /* ******************************************************************* * 32-bit hash functions *********************************************************************/ -static const xxh_u32 XXH_PRIME32_1 = - 0x9E3779B1U; /* 0b10011110001101110111100110110001 */ -static const xxh_u32 XXH_PRIME32_2 = - 0x85EBCA77U; /* 0b10000101111010111100101001110111 */ -static const xxh_u32 XXH_PRIME32_3 = - 0xC2B2AE3DU; /* 0b11000010101100101010111000111101 */ -static const xxh_u32 XXH_PRIME32_4 = - 0x27D4EB2FU; /* 0b00100111110101001110101100101111 */ -static const xxh_u32 XXH_PRIME32_5 = - 0x165667B1U; /* 0b00010110010101100110011110110001 */ +/*! + * @} + * @defgroup xxh32_impl XXH32 implementation + * @ingroup impl + * @{ + + */ +/* #define instead of static const, to be used as initializers */ + #define XXH_PRIME32_1 0x9E3779B1U /*!< 0b10011110001101110111100110110001 */ + #define XXH_PRIME32_2 0x85EBCA77U /*!< 0b10000101111010111100101001110111 */ + #define XXH_PRIME32_3 0xC2B2AE3DU /*!< 0b11000010101100101010111000111101 */ + #define XXH_PRIME32_4 0x27D4EB2FU /*!< 0b00100111110101001110101100101111 */ + #define XXH_PRIME32_5 0x165667B1U /*!< 0b00010110010101100110011110110001 */ #ifdef XXH_OLD_NAMES #define PRIME32_1 XXH_PRIME32_1 @@ -1323,19 +1999,29 @@ static const xxh_u32 XXH_PRIME32_5 = #define PRIME32_5 XXH_PRIME32_5 #endif +/*! + * @internal + * @brief Normal stripe processing routine. + * + * This shuffles the bits so that any bit from @p input impacts several bits in + * @p acc. + * + * @param acc The accumulator lane. + * @param input The stripe of input to mix. + * @return The mixed accumulator lane. + */ static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input) { acc += input * XXH_PRIME32_2; acc = XXH_rotl32(acc, 13); acc *= XXH_PRIME32_1; - #if defined(__GNUC__) && defined(__SSE4_1__) && \ + #if (defined(__SSE4_1__) || defined(__aarch64__)) && \ !defined(XXH_ENABLE_AUTOVECTORIZE) /* * UGLY HACK: - * This inline assembly hack forces acc into a normal register. This is the - * only thing that prevents GCC and Clang from autovectorizing the XXH32 - * loop (pragmas and attributes don't work for some resason) without globally - * disabling SSE4.1. + * A compiler fence is the only thing that prevents GCC and Clang from + * autovectorizing the XXH32 loop (pragmas and attributes don't work for some + * reason) without globally disabling SSE4.1. * * The reason we want to avoid vectorization is because despite working on * 4 integers at a time, there are multiple factors slowing XXH32 down on @@ -1360,28 +2046,26 @@ static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input) { * can load data, while v3 can multiply. SSE forces them to operate * together. * - * How this hack works: - * __asm__("" // Declare an assembly block but don't declare any - * instructions : // However, as an Input/Output Operand, - * "+r" // constrain a read/write operand (+) as a general purpose - * register (r). (acc) // and set acc as the operand - * ); - * - * Because of the 'r', the compiler has promised that seed will be in a - * general purpose register and the '+' says that it will be 'read/write', - * so it has to assume it has changed. It is like volatile without all the - * loads and stores. - * - * Since the argument has to be in a normal register (not an SSE register), - * each time XXH32_round is called, it is impossible to vectorize. + * This is also enabled on AArch64, as Clang autovectorizes it incorrectly + * and it is pointless writing a NEON implementation that is basically the + * same speed as scalar for XXH32. */ - __asm__("" : "+r"(acc)); + XXH_COMPILER_GUARD(acc); #endif return acc; } -/* mix all bits */ +/*! + * @internal + * @brief Mixes all bits to finalize the hash. + * + * The final mix ensures that all input bits have a chance to impact any bit in + * the output digest, resulting in an unbiased distribution. + * + * @param h32 The hash to avalanche. + * @return The avalanched hash. + */ static xxh_u32 XXH32_avalanche(xxh_u32 h32) { h32 ^= h32 >> 15; @@ -1395,11 +2079,23 @@ static xxh_u32 XXH32_avalanche(xxh_u32 h32) { #define XXH_get32bits(p) XXH_readLE32_align(p, align) +/*! + * @internal + * @brief Processes the last 0-15 bytes of @p ptr. + * + * There may be up to 15 bytes remaining to consume from the input. + * This final stage will digest them to ensure that all input bytes are present + * in the final mix. + * + * @param h32 The hash to finalize. + * @param ptr The pointer to the remaining input. + * @param len The remaining length, modulo 16. + * @param align Whether @p ptr is aligned. + * @return The finalized hash. + */ static xxh_u32 XXH32_finalize(xxh_u32 h32, const xxh_u8 *ptr, size_t len, XXH_alignment align) { - - /* dummy comment */ - +\ #define XXH_PROCESS1 \ do { \ \ @@ -1443,20 +2139,20 @@ static xxh_u32 XXH32_finalize(xxh_u32 h32, const xxh_u8 *ptr, size_t len, case 12: XXH_PROCESS4; - /* fallthrough */ + XXH_FALLTHROUGH; case 8: XXH_PROCESS4; - /* fallthrough */ + XXH_FALLTHROUGH; case 4: XXH_PROCESS4; return XXH32_avalanche(h32); case 13: XXH_PROCESS4; - /* fallthrough */ + XXH_FALLTHROUGH; case 9: XXH_PROCESS4; - /* fallthrough */ + XXH_FALLTHROUGH; case 5: XXH_PROCESS4; XXH_PROCESS1; @@ -1464,10 +2160,10 @@ static xxh_u32 XXH32_finalize(xxh_u32 h32, const xxh_u8 *ptr, size_t len, case 14: XXH_PROCESS4; - /* fallthrough */ + XXH_FALLTHROUGH; case 10: XXH_PROCESS4; - /* fallthrough */ + XXH_FALLTHROUGH; case 6: XXH_PROCESS4; XXH_PROCESS1; @@ -1476,22 +2172,22 @@ static xxh_u32 XXH32_finalize(xxh_u32 h32, const xxh_u8 *ptr, size_t len, case 15: XXH_PROCESS4; - /* fallthrough */ + XXH_FALLTHROUGH; case 11: XXH_PROCESS4; - /* fallthrough */ + XXH_FALLTHROUGH; case 7: XXH_PROCESS4; - /* fallthrough */ + XXH_FALLTHROUGH; case 3: XXH_PROCESS1; - /* fallthrough */ + XXH_FALLTHROUGH; case 2: XXH_PROCESS1; - /* fallthrough */ + XXH_FALLTHROUGH; case 1: XXH_PROCESS1; - /* fallthrough */ + XXH_FALLTHROUGH; case 0: return XXH32_avalanche(h32); @@ -1512,10 +2208,18 @@ static xxh_u32 XXH32_finalize(xxh_u32 h32, const xxh_u8 *ptr, size_t len, #undef XXH_PROCESS4 #endif +/*! + * @internal + * @brief The implementation for @ref XXH32(). + * + * @param input, len, seed Directly passed from @ref XXH32(). + * @param align Whether @p input is aligned. + * @return The calculated hash. + */ XXH_FORCE_INLINE xxh_u32 XXH32_endian_align(const xxh_u8 *input, size_t len, xxh_u32 seed, XXH_alignment align) { - const xxh_u8 *bEnd = input + len; + const xxh_u8 *bEnd = input ? input + len : NULL; xxh_u32 h32; #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && \ @@ -1565,6 +2269,7 @@ XXH_FORCE_INLINE xxh_u32 XXH32_endian_align(const xxh_u8 *input, size_t len, } +/*! @ingroup xxh32_family */ XXH_PUBLIC_API XXH32_hash_t XXH32(const void *input, size_t len, XXH32_hash_t seed) { @@ -1574,9 +2279,7 @@ XXH_PUBLIC_API XXH32_hash_t XXH32(const void *input, size_t len, XXH32_reset(&state, seed); XXH32_update(&state, (const xxh_u8*)input, len); return XXH32_digest(&state); - #else - if (XXH_FORCE_ALIGN_CHECK) { if ((((size_t)input) & 3) == @@ -1593,13 +2296,16 @@ XXH_PUBLIC_API XXH32_hash_t XXH32(const void *input, size_t len, } /******* Hash streaming *******/ - +/*! + * @ingroup xxh32_family + */ XXH_PUBLIC_API XXH32_state_t *XXH32_createState(void) { return (XXH32_state_t *)XXH_malloc(sizeof(XXH32_state_t)); } +/*! @ingroup xxh32_family */ XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t *statePtr) { XXH_free(statePtr); @@ -1607,6 +2313,7 @@ XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t *statePtr) { } +/*! @ingroup xxh32_family */ XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t * dstState, const XXH32_state_t *srcState) { @@ -1614,6 +2321,7 @@ XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t * dstState, } +/*! @ingroup xxh32_family */ XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t *statePtr, XXH32_hash_t seed) { @@ -1630,6 +2338,7 @@ XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t *statePtr, } +/*! @ingroup xxh32_family */ XXH_PUBLIC_API XXH_errorcode XXH32_update(XXH32_state_t *state, const void *input, size_t len) { @@ -1719,6 +2428,7 @@ XXH_PUBLIC_API XXH_errorcode XXH32_update(XXH32_state_t *state, } +/*! @ingroup xxh32_family */ XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t *state) { xxh_u32 h32; @@ -1743,7 +2453,8 @@ XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t *state) { /******* Canonical representation *******/ -/* +/*! + * @ingroup xxh32_family * The default return values from XXH functions are unsigned 32 and 64 bit * integers. * @@ -1765,6 +2476,7 @@ XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t *dst, } +/*! @ingroup xxh32_family */ XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t *src) { @@ -1777,7 +2489,12 @@ XXH32_hashFromCanonical(const XXH32_canonical_t *src) { /* ******************************************************************* * 64-bit hash functions *********************************************************************/ +/*! + * @} + * @ingroup impl + * @{ + */ /******* Memory access *******/ typedef XXH64_hash_t xxh_u64; @@ -1786,40 +2503,6 @@ typedef XXH64_hash_t xxh_u64; #define U64 xxh_u64 #endif - /*! - * XXH_REROLL_XXH64: - * Whether to reroll the XXH64_finalize() loop. - * - * Just like XXH32, we can unroll the XXH64_finalize() loop. This can be a - * performance gain on 64-bit hosts, as only one jump is required. - * - * However, on 32-bit hosts, because arithmetic needs to be done with two - * 32-bit registers, and 64-bit arithmetic needs to be simulated, it isn't - * beneficial to unroll. The code becomes ridiculously large (the largest - * function in the binary on i386!), and rerolling it saves anywhere from - * 3kB to 20kB. It is also slightly faster because it fits into cache better - * and is more likely to be inlined by the compiler. - * - * If XXH_REROLL is defined, this is ignored and the loop is always - * rerolled. - */ - #ifndef XXH_REROLL_XXH64 - #if (defined(__ILP32__) || \ - defined(_ILP32)) /* ILP32 is often defined on 32-bit GCC family */ \ - || !(defined(__x86_64__) || defined(_M_X64) || \ - defined(_M_AMD64) /* x86-64 */ \ - || defined(_M_ARM64) || defined(__aarch64__) || \ - defined(__arm64__) /* aarch64 */ \ - || defined(__PPC64__) || defined(__PPC64LE__) || \ - defined(__ppc64__) || defined(__powerpc64__) /* ppc64 */ \ - || defined(__mips64__) || defined(__mips64)) /* mips64 */ \ - || (!defined(SIZE_MAX) || SIZE_MAX < ULLONG_MAX) /* check limits */ - #define XXH_REROLL_XXH64 1 - #else - #define XXH_REROLL_XXH64 0 - #endif - #endif /* !defined(XXH_REROLL_XXH64) */ - #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS == 3)) /* * Manual byteshift. Best for old compilers which don't inline memcpy. @@ -1950,23 +2633,35 @@ XXH_FORCE_INLINE xxh_u64 XXH_readLE64_align(const void * ptr, } -/******* xxh64 *******/ + /******* xxh64 *******/ + /*! + * @} + * @defgroup xxh64_impl XXH64 implementation + * @ingroup impl + * @{ -static const xxh_u64 XXH_PRIME64_1 = - 0x9E3779B185EBCA87ULL; /* 0b1001111000110111011110011011000110000101111010111100101010000111 - */ -static const xxh_u64 XXH_PRIME64_2 = - 0xC2B2AE3D27D4EB4FULL; /* 0b1100001010110010101011100011110100100111110101001110101101001111 - */ -static const xxh_u64 XXH_PRIME64_3 = - 0x165667B19E3779F9ULL; /* 0b0001011001010110011001111011000110011110001101110111100111111001 - */ -static const xxh_u64 XXH_PRIME64_4 = - 0x85EBCA77C2B2AE63ULL; /* 0b1000010111101011110010100111011111000010101100101010111001100011 - */ -static const xxh_u64 XXH_PRIME64_5 = - 0x27D4EB2F165667C5ULL; /* 0b0010011111010100111010110010111100010110010101100110011111000101 - */ + */ + /* #define rather that static const, to be used as initializers */ + #define XXH_PRIME64_1 \ + 0x9E3779B185EBCA87ULL /*!< \ + 0b1001111000110111011110011011000110000101111010111100101010000111 \ + */ + #define XXH_PRIME64_2 \ + 0xC2B2AE3D27D4EB4FULL /*!< \ + 0b1100001010110010101011100011110100100111110101001110101101001111 \ + */ + #define XXH_PRIME64_3 \ + 0x165667B19E3779F9ULL /*!< \ + 0b0001011001010110011001111011000110011110001101110111100111111001 \ + */ + #define XXH_PRIME64_4 \ + 0x85EBCA77C2B2AE63ULL /*!< \ + 0b1000010111101011110010100111011111000010101100101010111001100011 \ + */ + #define XXH_PRIME64_5 \ + 0x27D4EB2F165667C5ULL /*!< \ + 0b0010011111010100111010110010111100010110010101100110011111000101 \ + */ #ifdef XXH_OLD_NAMES #define PRIME64_1 XXH_PRIME64_1 @@ -2010,185 +2705,35 @@ static xxh_u64 XXH64_avalanche(xxh_u64 h64) { static xxh_u64 XXH64_finalize(xxh_u64 h64, const xxh_u8 *ptr, size_t len, XXH_alignment align) { - /* dummy comment */ - - #define XXH_PROCESS1_64 \ - do { \ - \ - h64 ^= (*ptr++) * XXH_PRIME64_5; \ - h64 = XXH_rotl64(h64, 11) * XXH_PRIME64_1; \ - \ - } while (0) - - #define XXH_PROCESS4_64 \ - do { \ - \ - h64 ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1; \ - ptr += 4; \ - h64 = XXH_rotl64(h64, 23) * XXH_PRIME64_2 + XXH_PRIME64_3; \ - \ - } while (0) - - #define XXH_PROCESS8_64 \ - do { \ - \ - xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr)); \ - ptr += 8; \ - h64 ^= k1; \ - h64 = XXH_rotl64(h64, 27) * XXH_PRIME64_1 + XXH_PRIME64_4; \ - \ - } while (0) - - /* Rerolled version for 32-bit targets is faster and much smaller. */ - if (XXH_REROLL || XXH_REROLL_XXH64) { - - len &= 31; - while (len >= 8) { - - XXH_PROCESS8_64; - len -= 8; - - } - - if (len >= 4) { - - XXH_PROCESS4_64; - len -= 4; - - } - - while (len > 0) { + len &= 31; + while (len >= 8) { - XXH_PROCESS1_64; - --len; + xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr)); + ptr += 8; + h64 ^= k1; + h64 = XXH_rotl64(h64, 27) * XXH_PRIME64_1 + XXH_PRIME64_4; + len -= 8; - } + } - return XXH64_avalanche(h64); + if (len >= 4) { - } else { + h64 ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1; + ptr += 4; + h64 = XXH_rotl64(h64, 23) * XXH_PRIME64_2 + XXH_PRIME64_3; + len -= 4; - switch (len & 31) { + } - case 24: - XXH_PROCESS8_64; - /* fallthrough */ - case 16: - XXH_PROCESS8_64; - /* fallthrough */ - case 8: - XXH_PROCESS8_64; - return XXH64_avalanche(h64); - - case 28: - XXH_PROCESS8_64; - /* fallthrough */ - case 20: - XXH_PROCESS8_64; - /* fallthrough */ - case 12: - XXH_PROCESS8_64; - /* fallthrough */ - case 4: - XXH_PROCESS4_64; - return XXH64_avalanche(h64); - - case 25: - XXH_PROCESS8_64; - /* fallthrough */ - case 17: - XXH_PROCESS8_64; - /* fallthrough */ - case 9: - XXH_PROCESS8_64; - XXH_PROCESS1_64; - return XXH64_avalanche(h64); - - case 29: - XXH_PROCESS8_64; - /* fallthrough */ - case 21: - XXH_PROCESS8_64; - /* fallthrough */ - case 13: - XXH_PROCESS8_64; - /* fallthrough */ - case 5: - XXH_PROCESS4_64; - XXH_PROCESS1_64; - return XXH64_avalanche(h64); - - case 26: - XXH_PROCESS8_64; - /* fallthrough */ - case 18: - XXH_PROCESS8_64; - /* fallthrough */ - case 10: - XXH_PROCESS8_64; - XXH_PROCESS1_64; - XXH_PROCESS1_64; - return XXH64_avalanche(h64); - - case 30: - XXH_PROCESS8_64; - /* fallthrough */ - case 22: - XXH_PROCESS8_64; - /* fallthrough */ - case 14: - XXH_PROCESS8_64; - /* fallthrough */ - case 6: - XXH_PROCESS4_64; - XXH_PROCESS1_64; - XXH_PROCESS1_64; - return XXH64_avalanche(h64); - - case 27: - XXH_PROCESS8_64; - /* fallthrough */ - case 19: - XXH_PROCESS8_64; - /* fallthrough */ - case 11: - XXH_PROCESS8_64; - XXH_PROCESS1_64; - XXH_PROCESS1_64; - XXH_PROCESS1_64; - return XXH64_avalanche(h64); - - case 31: - XXH_PROCESS8_64; - /* fallthrough */ - case 23: - XXH_PROCESS8_64; - /* fallthrough */ - case 15: - XXH_PROCESS8_64; - /* fallthrough */ - case 7: - XXH_PROCESS4_64; - /* fallthrough */ - case 3: - XXH_PROCESS1_64; - /* fallthrough */ - case 2: - XXH_PROCESS1_64; - /* fallthrough */ - case 1: - XXH_PROCESS1_64; - /* fallthrough */ - case 0: - return XXH64_avalanche(h64); + while (len > 0) { - } + h64 ^= (*ptr++) * XXH_PRIME64_5; + h64 = XXH_rotl64(h64, 11) * XXH_PRIME64_1; + --len; } - /* impossible to reach */ - XXH_ASSERT(0); - return 0; /* unreachable, but some compilers complain without it */ + return XXH64_avalanche(h64); } @@ -2205,7 +2750,7 @@ static xxh_u64 XXH64_finalize(xxh_u64 h64, const xxh_u8 *ptr, size_t len, XXH_FORCE_INLINE xxh_u64 XXH64_endian_align(const xxh_u8 *input, size_t len, xxh_u64 seed, XXH_alignment align) { - const xxh_u8 *bEnd = input + len; + const xxh_u8 *bEnd = input ? input + len : NULL; xxh_u64 h64; #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && \ @@ -2259,6 +2804,7 @@ XXH_FORCE_INLINE xxh_u64 XXH64_endian_align(const xxh_u8 *input, size_t len, } +/*! @ingroup xxh64_family */ XXH_PUBLIC_API XXH64_hash_t XXH64(const void *input, size_t len, XXH64_hash_t seed) { @@ -2268,9 +2814,7 @@ XXH_PUBLIC_API XXH64_hash_t XXH64(const void *input, size_t len, XXH64_reset(&state, seed); XXH64_update(&state, (const xxh_u8*)input, len); return XXH64_digest(&state); - #else - if (XXH_FORCE_ALIGN_CHECK) { if ((((size_t)input) & 7) == @@ -2289,12 +2833,14 @@ XXH_PUBLIC_API XXH64_hash_t XXH64(const void *input, size_t len, /******* Hash Streaming *******/ +/*! @ingroup xxh64_family*/ XXH_PUBLIC_API XXH64_state_t *XXH64_createState(void) { return (XXH64_state_t *)XXH_malloc(sizeof(XXH64_state_t)); } +/*! @ingroup xxh64_family */ XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t *statePtr) { XXH_free(statePtr); @@ -2302,6 +2848,7 @@ XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t *statePtr) { } +/*! @ingroup xxh64_family */ XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t * dstState, const XXH64_state_t *srcState) { @@ -2309,6 +2856,7 @@ XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t * dstState, } +/*! @ingroup xxh64_family */ XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t *statePtr, XXH64_hash_t seed) { @@ -2325,6 +2873,7 @@ XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t *statePtr, } +/*! @ingroup xxh64_family */ XXH_PUBLIC_API XXH_errorcode XXH64_update(XXH64_state_t *state, const void *input, size_t len) { @@ -2403,6 +2952,7 @@ XXH_PUBLIC_API XXH_errorcode XXH64_update(XXH64_state_t *state, } +/*! @ingroup xxh64_family */ XXH_PUBLIC_API XXH64_hash_t XXH64_digest(const XXH64_state_t *state) { xxh_u64 h64; @@ -2436,6 +2986,7 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_digest(const XXH64_state_t *state) { /******* Canonical representation *******/ +/*! @ingroup xxh64_family */ XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t *dst, XXH64_hash_t hash) { @@ -2445,6 +2996,7 @@ XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t *dst, } +/*! @ingroup xxh64_family */ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t *src) { @@ -2452,380 +3004,452 @@ XXH64_hashFromCanonical(const XXH64_canonical_t *src) { } - /* ********************************************************************* - * XXH3 - * New generation hash designed for speed on small keys and vectorization - ************************************************************************ */ + #ifndef XXH_NO_XXH3 - /* === Compiler specifics === */ + /* ********************************************************************* + * XXH3 + * New generation hash designed for speed on small keys and vectorization + ************************************************************************ */ + /*! + * @} + * @defgroup xxh3_impl XXH3 implementation + * @ingroup impl + * @{ - #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* >= C99 */ - #define XXH_RESTRICT restrict - #else - /* Note: it might be useful to define __restrict or __restrict__ for some - * C++ compilers */ - #define XXH_RESTRICT /* disable */ - #endif + */ - #if (defined(__GNUC__) && (__GNUC__ >= 3)) || \ - (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || \ - defined(__clang__) - #define XXH_likely(x) __builtin_expect(x, 1) - #define XXH_unlikely(x) __builtin_expect(x, 0) - #else - #define XXH_likely(x) (x) - #define XXH_unlikely(x) (x) - #endif + /* === Compiler specifics === */ - #if defined(__GNUC__) - #if defined(__AVX2__) - #include - #elif defined(__SSE2__) - #include - #elif defined(__ARM_NEON__) || defined(__ARM_NEON) - #define inline __inline__ /* circumvent a clang bug */ - #include - #undef inline + #if ((defined(sun) || defined(__sun)) && \ + __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested \ + with GCC 5.5 */ + #define XXH_RESTRICT /* disable */ + #elif defined(__STDC_VERSION__) && \ + __STDC_VERSION__ >= 199901L /* >= C99 */ + #define XXH_RESTRICT restrict + #else + /* Note: it might be useful to define __restrict or __restrict__ for + * some C++ compilers */ + #define XXH_RESTRICT /* disable */ #endif - #elif defined(_MSC_VER) - #include - #endif - /* - * One goal of XXH3 is to make it fast on both 32-bit and 64-bit, while - * remaining a true 64-bit/128-bit hash function. - * - * This is done by prioritizing a subset of 64-bit operations that can be - * emulated without too many steps on the average 32-bit machine. - * - * For example, these two lines seem similar, and run equally fast on - * 64-bit: - * - * xxh_u64 x; - * x ^= (x >> 47); // good - * x ^= (x >> 13); // bad - * - * However, to a 32-bit machine, there is a major difference. - * - * x ^= (x >> 47) looks like this: - * - * x.lo ^= (x.hi >> (47 - 32)); - * - * while x ^= (x >> 13) looks like this: - * - * // note: funnel shifts are not usually cheap. - * x.lo ^= (x.lo >> 13) | (x.hi << (32 - 13)); - * x.hi ^= (x.hi >> 13); - * - * The first one is significantly faster than the second, simply because the - * shift is larger than 32. This means: - * - All the bits we need are in the upper 32 bits, so we can ignore the - * lower 32 bits in the shift. - * - The shift result will always fit in the lower 32 bits, and therefore, - * we can ignore the upper 32 bits in the xor. - * - * Thanks to this optimization, XXH3 only requires these features to be - * efficient: - * - * - Usable unaligned access - * - A 32-bit or 64-bit ALU - * - If 32-bit, a decent ADC instruction - * - A 32 or 64-bit multiply with a 64-bit result - * - For the 128-bit variant, a decent byteswap helps short inputs. - * - * The first two are already required by XXH32, and almost all 32-bit and - * 64-bit platforms which can run XXH32 can run XXH3 efficiently. - * - * Thumb-1, the classic 16-bit only subset of ARM's instruction set, is one - * notable exception. - * - * First of all, Thumb-1 lacks support for the UMULL instruction which - * performs the important long multiply. This means numerous __aeabi_lmul - * calls. - * - * Second of all, the 8 functional registers are just not enough. - * Setup for __aeabi_lmul, byteshift loads, pointers, and all arithmetic - * need Lo registers, and this shuffling results in thousands more MOVs than - * A32. - * - * A32 and T32 don't have this limitation. They can access all 14 registers, - * do a 32->64 multiply with UMULL, and the flexible operand allowing free - * shifts is helpful, too. - * - * Therefore, we do a quick sanity check. - * - * If compiling Thumb-1 for a target which supports ARM instructions, we - * will emit a warning, as it is not a "sane" platform to compile for. - * - * Usually, if this happens, it is because of an accident and you probably - * need to specify -march, as you likely meant to compile for a newer - * architecture. - * - * Credit: large sections of the vectorial and asm source code paths - * have been contributed by @easyaspi314 - */ - #if defined(__thumb__) && !defined(__thumb2__) && \ - defined(__ARM_ARCH_ISA_ARM) - #warning "XXH3 is highly inefficient without ARM or Thumb-2." - #endif + #if (defined(__GNUC__) && (__GNUC__ >= 3)) || \ + (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || \ + defined(__clang__) + #define XXH_likely(x) __builtin_expect(x, 1) + #define XXH_unlikely(x) __builtin_expect(x, 0) + #else + #define XXH_likely(x) (x) + #define XXH_unlikely(x) (x) + #endif + + #if defined(__GNUC__) + #if defined(__AVX2__) + #include + #elif defined(__SSE2__) + #include + #elif defined(__ARM_NEON__) || defined(__ARM_NEON) + #define inline __inline__ /* circumvent a clang bug */ + #include + #undef inline + #endif + #elif defined(_MSC_VER) + #include + #endif + + /* + * One goal of XXH3 is to make it fast on both 32-bit and 64-bit, while + * remaining a true 64-bit/128-bit hash function. + * + * This is done by prioritizing a subset of 64-bit operations that can be + * emulated without too many steps on the average 32-bit machine. + * + * For example, these two lines seem similar, and run equally fast on + * 64-bit: + * + * xxh_u64 x; + * x ^= (x >> 47); // good + * x ^= (x >> 13); // bad + * + * However, to a 32-bit machine, there is a major difference. + * + * x ^= (x >> 47) looks like this: + * + * x.lo ^= (x.hi >> (47 - 32)); + * + * while x ^= (x >> 13) looks like this: + * + * // note: funnel shifts are not usually cheap. + * x.lo ^= (x.lo >> 13) | (x.hi << (32 - 13)); + * x.hi ^= (x.hi >> 13); + * + * The first one is significantly faster than the second, simply because + * the shift is larger than 32. This means: + * - All the bits we need are in the upper 32 bits, so we can ignore the + * lower 32 bits in the shift. + * - The shift result will always fit in the lower 32 bits, and + * therefore, we can ignore the upper 32 bits in the xor. + * + * Thanks to this optimization, XXH3 only requires these features to be + * efficient: + * + * - Usable unaligned access + * - A 32-bit or 64-bit ALU + * - If 32-bit, a decent ADC instruction + * - A 32 or 64-bit multiply with a 64-bit result + * - For the 128-bit variant, a decent byteswap helps short inputs. + * + * The first two are already required by XXH32, and almost all 32-bit and + * 64-bit platforms which can run XXH32 can run XXH3 efficiently. + * + * Thumb-1, the classic 16-bit only subset of ARM's instruction set, is + * one notable exception. + * + * First of all, Thumb-1 lacks support for the UMULL instruction which + * performs the important long multiply. This means numerous __aeabi_lmul + * calls. + * + * Second of all, the 8 functional registers are just not enough. + * Setup for __aeabi_lmul, byteshift loads, pointers, and all arithmetic + * need Lo registers, and this shuffling results in thousands more MOVs + * than A32. + * + * A32 and T32 don't have this limitation. They can access all 14 + * registers, do a 32->64 multiply with UMULL, and the flexible operand + * allowing free shifts is helpful, too. + * + * Therefore, we do a quick sanity check. + * + * If compiling Thumb-1 for a target which supports ARM instructions, we + * will emit a warning, as it is not a "sane" platform to compile for. + * + * Usually, if this happens, it is because of an accident and you probably + * need to specify -march, as you likely meant to compile for a newer + * architecture. + * + * Credit: large sections of the vectorial and asm source code paths + * have been contributed by @easyaspi314 + */ + #if defined(__thumb__) && !defined(__thumb2__) && \ + defined(__ARM_ARCH_ISA_ARM) + #warning "XXH3 is highly inefficient without ARM or Thumb-2." + #endif /* ========================================== * Vectorization detection * ========================================== */ - #define XXH_SCALAR 0 /* Portable scalar version */ - #define XXH_SSE2 1 /* SSE2 for Pentium 4 and all x86_64 */ - #define XXH_AVX2 2 /* AVX2 for Haswell and Bulldozer */ - #define XXH_AVX512 3 /* AVX512 for Skylake and Icelake */ - #define XXH_NEON 4 /* NEON for most ARMv7-A and all AArch64 */ - #define XXH_VSX 5 /* VSX and ZVector for POWER8/z13 */ - - #ifndef XXH_VECTOR /* can be defined on command line */ - #if defined(__AVX512F__) - #define XXH_VECTOR XXH_AVX512 - #elif defined(__AVX2__) - #define XXH_VECTOR XXH_AVX2 - #elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || \ - (defined(_M_IX86_FP) && (_M_IX86_FP == 2)) - #define XXH_VECTOR XXH_SSE2 - #elif defined(__GNUC__) /* msvc support maybe later */ \ - && (defined(__ARM_NEON__) || defined(__ARM_NEON)) && \ - (defined(__LITTLE_ENDIAN__) /* We only support little endian NEON */ \ - || (defined(__BYTE_ORDER__) && \ - __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) - #define XXH_VECTOR XXH_NEON - #elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) || \ - (defined(__s390x__) && defined(__VEC__)) && \ - defined(__GNUC__) /* TODO: IBM XL */ - #define XXH_VECTOR XXH_VSX - #else + + #ifdef XXH_DOXYGEN + /*! + * @ingroup tuning + * @brief Overrides the vectorization implementation chosen for XXH3. + * + * Can be defined to 0 to disable SIMD or any of the values mentioned in + * @ref XXH_VECTOR_TYPE. + * + * If this is not defined, it uses predefined macros to determine the + * best implementation. + */ #define XXH_VECTOR XXH_SCALAR - #endif - #endif +/*! + * @ingroup tuning + * @brief Possible values for @ref XXH_VECTOR. + * + * Note that these are actually implemented as macros. + * + * If this is not defined, it is detected automatically. + * @ref XXH_X86DISPATCH overrides this. + */ +enum XXH_VECTOR_TYPE /* fake enum */ { + + XXH_SCALAR = 0, /*!< Portable scalar version */ + XXH_SSE2 = 1, /*!< + * SSE2 for Pentium 4, Opteron, all x86_64. + * + * @note SSE2 is also guaranteed on Windows 10, macOS, and + * Android x86. + */ + XXH_AVX2 = 2, /*!< AVX2 for Haswell and Bulldozer */ + XXH_AVX512 = 3, /*!< AVX512 for Skylake and Icelake */ + XXH_NEON = 4, /*!< NEON for most ARMv7-A and all AArch64 */ + XXH_VSX = 5, /*!< VSX and ZVector for POWER8/z13 (64-bit) */ - /* - * Controls the alignment of the accumulator, - * for compatibility with aligned vector loads, which are usually faster. - */ - #ifndef XXH_ACC_ALIGN - #if defined(XXH_X86DISPATCH) - #define XXH_ACC_ALIGN 64 /* for compatibility with avx512 */ - #elif XXH_VECTOR == XXH_SCALAR /* scalar */ +}; + + /*! + * @ingroup tuning + * @brief Selects the minimum alignment for XXH3's accumulators. + * + * When using SIMD, this should match the alignment reqired for said + * vector type, so, for example, 32 for AVX2. + * + * Default: Auto detected. + */ #define XXH_ACC_ALIGN 8 - #elif XXH_VECTOR == XXH_SSE2 /* sse2 */ - #define XXH_ACC_ALIGN 16 - #elif XXH_VECTOR == XXH_AVX2 /* avx2 */ - #define XXH_ACC_ALIGN 32 - #elif XXH_VECTOR == XXH_NEON /* neon */ - #define XXH_ACC_ALIGN 16 - #elif XXH_VECTOR == XXH_VSX /* vsx */ - #define XXH_ACC_ALIGN 16 - #elif XXH_VECTOR == XXH_AVX512 /* avx512 */ - #define XXH_ACC_ALIGN 64 #endif - #endif - #if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 || \ - XXH_VECTOR == XXH_AVX2 || XXH_VECTOR == XXH_AVX512 - #define XXH_SEC_ALIGN XXH_ACC_ALIGN - #else - #define XXH_SEC_ALIGN 8 - #endif + /* Actual definition */ + #ifndef XXH_DOXYGEN + #define XXH_SCALAR 0 + #define XXH_SSE2 1 + #define XXH_AVX2 2 + #define XXH_AVX512 3 + #define XXH_NEON 4 + #define XXH_VSX 5 + #endif + + #ifndef XXH_VECTOR /* can be defined on command line */ + #if defined(__AVX512F__) + #define XXH_VECTOR XXH_AVX512 + #elif defined(__AVX2__) + #define XXH_VECTOR XXH_AVX2 + #elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || \ + (defined(_M_IX86_FP) && (_M_IX86_FP == 2)) + #define XXH_VECTOR XXH_SSE2 + #elif defined(__GNUC__) /* msvc support maybe later */ \ + && (defined(__ARM_NEON__) || defined(__ARM_NEON)) && \ + (defined( \ + __LITTLE_ENDIAN__) /* We only support little endian NEON */ \ + || (defined(__BYTE_ORDER__) && \ + __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) + #define XXH_VECTOR XXH_NEON + #elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) || \ + (defined(__s390x__) && defined(__VEC__)) && \ + defined(__GNUC__) /* TODO: IBM XL */ + #define XXH_VECTOR XXH_VSX + #else + #define XXH_VECTOR XXH_SCALAR + #endif + #endif + + /* + * Controls the alignment of the accumulator, + * for compatibility with aligned vector loads, which are usually faster. + */ + #ifndef XXH_ACC_ALIGN + #if defined(XXH_X86DISPATCH) + #define XXH_ACC_ALIGN 64 /* for compatibility with avx512 */ + #elif XXH_VECTOR == XXH_SCALAR /* scalar */ + #define XXH_ACC_ALIGN 8 + #elif XXH_VECTOR == XXH_SSE2 /* sse2 */ + #define XXH_ACC_ALIGN 16 + #elif XXH_VECTOR == XXH_AVX2 /* avx2 */ + #define XXH_ACC_ALIGN 32 + #elif XXH_VECTOR == XXH_NEON /* neon */ + #define XXH_ACC_ALIGN 16 + #elif XXH_VECTOR == XXH_VSX /* vsx */ + #define XXH_ACC_ALIGN 16 + #elif XXH_VECTOR == XXH_AVX512 /* avx512 */ + #define XXH_ACC_ALIGN 64 + #endif + #endif - /* - * UGLY HACK: - * GCC usually generates the best code with -O3 for xxHash. - * - * However, when targeting AVX2, it is overzealous in its unrolling - * resulting in code roughly 3/4 the speed of Clang. - * - * There are other issues, such as GCC splitting _mm256_loadu_si256 into - * _mm_loadu_si128 + _mm256_inserti128_si256. This is an optimization which - * only applies to Sandy and Ivy Bridge... which don't even support AVX2. - * - * That is why when compiling the AVX2 version, it is recommended to use - * either -O2 -mavx2 -march=haswell or -O2 -mavx2 - * -mno-avx256-split-unaligned-load for decent performance, or to use Clang - * instead. - * - * Fortunately, we can control the first one with a pragma that forces GCC - * into -O2, but the other one we can't control without "failed to inline - * always inline function due to target mismatch" warnings. - */ - #if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \ - && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \ - && defined(__OPTIMIZE__) && \ - !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */ - #pragma GCC push_options - #pragma GCC optimize("-O2") - #endif + #if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 || \ + XXH_VECTOR == XXH_AVX2 || XXH_VECTOR == XXH_AVX512 + #define XXH_SEC_ALIGN XXH_ACC_ALIGN + #else + #define XXH_SEC_ALIGN 8 + #endif - #if XXH_VECTOR == XXH_NEON /* - * NEON's setup for vmlal_u32 is a little more complicated than it is on - * SSE2, AVX2, and VSX. - * - * While PMULUDQ and VMULEUW both perform a mask, VMLAL.U32 performs an - * upcast. - * - * To do the same operation, the 128-bit 'Q' register needs to be split - * into two 64-bit 'D' registers, performing this operation:: - * - * [ a | b ] | - * '---------. .--------' | | x | - * | .---------' '--------. | - * [ a & 0xFFFFFFFF | b & 0xFFFFFFFF ],[ a >> 32 | b >> 32 ] - * - * Due to significant changes in aarch64, the fastest method for aarch64 - * is completely different than the fastest method for ARMv7-A. - * - * ARMv7-A treats D registers as unions overlaying Q registers, so - * modifying D11 will modify the high half of Q5. This is similar to how - * modifying AH will only affect bits 8-15 of AX on x86. - * - * VZIP takes two registers, and puts even lanes in one register and odd - * lanes in the other. - * - * On ARMv7-A, this strangely modifies both parameters in place instead of - * taking the usual 3-operand form. - * - * Therefore, if we want to do this, we can simply use a D-form VZIP.32 on - * the lower and upper halves of the Q register to end up with the high - * and low halves where we want - all in one instruction. - * - * vzip.32 d10, d11 @ d10 = { d10[0], d11[0] }; d11 = { d10[1], - * d11[1] } - * - * Unfortunately we need inline assembly for this: Instructions modifying - * two registers at once is not possible in GCC or Clang's IR, and they - * have to create a copy. - * - * aarch64 requires a different approach. - * - * In order to make it easier to write a decent compiler for aarch64, many - * quirks were removed, such as conditional execution. - * - * NEON was also affected by this. - * - * aarch64 cannot access the high bits of a Q-form register, and writes to - * a D-form register zero the high bits, similar to how writes to W-form - * scalar registers (or DWORD registers on x86_64) work. - * - * The formerly free vget_high intrinsics now require a vext (with a few - * exceptions) - * - * Additionally, VZIP was replaced by ZIP1 and ZIP2, which are the - * equivalent of PUNPCKL* and PUNPCKH* in SSE, respectively, in order to - * only modify one operand. - * - * The equivalent of the VZIP.32 on the lower and upper halves would be - * this mess: + * UGLY HACK: + * GCC usually generates the best code with -O3 for xxHash. * - * ext v2.4s, v0.4s, v0.4s, #2 // v2 = { v0[2], v0[3], v0[0], v0[1] - * } zip1 v1.2s, v0.2s, v2.2s // v1 = { v0[0], v2[0] } zip2 v0.2s, - * v0.2s, v1.2s // v0 = { v0[1], v2[1] } + * However, when targeting AVX2, it is overzealous in its unrolling + * resulting in code roughly 3/4 the speed of Clang. * - * Instead, we use a literal downcast, vmovn_u64 (XTN), and vshrn_n_u64 - * (SHRN): + * There are other issues, such as GCC splitting _mm256_loadu_si256 into + * _mm_loadu_si128 + _mm256_inserti128_si256. This is an optimization + * which only applies to Sandy and Ivy Bridge... which don't even support + * AVX2. * - * shrn v1.2s, v0.2d, #32 // v1 = (uint32x2_t)(v0 >> 32); - * xtn v0.2s, v0.2d // v0 = (uint32x2_t)(v0 & 0xFFFFFFFF); + * That is why when compiling the AVX2 version, it is recommended to use + * either -O2 -mavx2 -march=haswell or -O2 -mavx2 + * -mno-avx256-split-unaligned-load for decent performance, or to use + * Clang instead. * - * This is available on ARMv7-A, but is less efficient than a single - * VZIP.32. + * Fortunately, we can control the first one with a pragma that forces GCC + * into -O2, but the other one we can't control without "failed to inline + * always inline function due to target mismatch" warnings. */ + #if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \ + && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \ + && defined(__OPTIMIZE__) && \ + !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */ + #pragma GCC push_options + #pragma GCC optimize("-O2") + #endif - /* - * Function-like macro: - * void XXH_SPLIT_IN_PLACE(uint64x2_t &in, uint32x2_t &outLo, uint32x2_t - * &outHi) - * { - - * outLo = (uint32x2_t)(in & 0xFFFFFFFF); - * outHi = (uint32x2_t)(in >> 32); - * in = UNDEFINED; - * } - */ - #if !defined(XXH_NO_VZIP_HACK) /* define to disable */ \ - && defined(__GNUC__) && !defined(__aarch64__) && !defined(__arm64__) - #define XXH_SPLIT_IN_PLACE(in, outLo, outHi) \ - do { \ - \ - /* Undocumented GCC/Clang operand modifier: %e0 = lower D half, \ - * %f0 = upper D half */ \ - /* https://github.com/gcc-mirror/gcc/blob/38cf91e5/gcc/config/arm/arm.c#L22486 \ - */ \ - /* https://github.com/llvm-mirror/llvm/blob/2c4ca683/lib/Target/ARM/ARMAsmPrinter.cpp#L399 \ - */ \ - __asm__("vzip.32 %e0, %f0" : "+w"(in)); \ - (outLo) = vget_low_u32(vreinterpretq_u32_u64(in)); \ - (outHi) = vget_high_u32(vreinterpretq_u32_u64(in)); \ - \ - } while (0) + #if XXH_VECTOR == XXH_NEON + /* + * NEON's setup for vmlal_u32 is a little more complicated than it is on + * SSE2, AVX2, and VSX. + * + * While PMULUDQ and VMULEUW both perform a mask, VMLAL.U32 performs an + * upcast. + * + * To do the same operation, the 128-bit 'Q' register needs to be split + * into two 64-bit 'D' registers, performing this operation:: + * + * [ a | b ] | + * '---------. .--------' | | x | + * | .---------' '--------. | + * [ a & 0xFFFFFFFF | b & 0xFFFFFFFF ],[ a >> 32 | b >> 32 + * ] + * + * Due to significant changes in aarch64, the fastest method for aarch64 + * is completely different than the fastest method for ARMv7-A. + * + * ARMv7-A treats D registers as unions overlaying Q registers, so + * modifying D11 will modify the high half of Q5. This is similar to how + * modifying AH will only affect bits 8-15 of AX on x86. + * + * VZIP takes two registers, and puts even lanes in one register and odd + * lanes in the other. + * + * On ARMv7-A, this strangely modifies both parameters in place instead + * of taking the usual 3-operand form. + * + * Therefore, if we want to do this, we can simply use a D-form VZIP.32 + * on the lower and upper halves of the Q register to end up with the + * high and low halves where we want - all in one instruction. + * + * vzip.32 d10, d11 @ d10 = { d10[0], d11[0] }; d11 = { - #else - #define XXH_SPLIT_IN_PLACE(in, outLo, outHi) \ - do { \ - \ - (outLo) = vmovn_u64(in); \ - (outHi) = vshrn_n_u64((in), 32); \ - \ - } while (0) + * d10[1], d11[1] } + * + * Unfortunately we need inline assembly for this: Instructions + * modifying two registers at once is not possible in GCC or Clang's IR, + * and they have to create a copy. + * + * aarch64 requires a different approach. + * + * In order to make it easier to write a decent compiler for aarch64, + * many quirks were removed, such as conditional execution. + * + * NEON was also affected by this. + * + * aarch64 cannot access the high bits of a Q-form register, and writes + * to a D-form register zero the high bits, similar to how writes to + * W-form scalar registers (or DWORD registers on x86_64) work. + * + * The formerly free vget_high intrinsics now require a vext (with a few + * exceptions) + * + * Additionally, VZIP was replaced by ZIP1 and ZIP2, which are the + * equivalent of PUNPCKL* and PUNPCKH* in SSE, respectively, in order to + * only modify one operand. + * + * The equivalent of the VZIP.32 on the lower and upper halves would be + * this mess: + * + * ext v2.4s, v0.4s, v0.4s, #2 // v2 = { v0[2], v0[3], v0[0], + * v0[1] } zip1 v1.2s, v0.2s, v2.2s // v1 = { v0[0], v2[0] } zip2 + * v0.2s, v0.2s, v1.2s // v0 = { v0[1], v2[1] } + * + * Instead, we use a literal downcast, vmovn_u64 (XTN), and vshrn_n_u64 + * (SHRN): + * + * shrn v1.2s, v0.2d, #32 // v1 = (uint32x2_t)(v0 >> 32); + * xtn v0.2s, v0.2d // v0 = (uint32x2_t)(v0 & 0xFFFFFFFF); + * + * This is available on ARMv7-A, but is less efficient than a single + * VZIP.32. + */ - #endif - #endif /* XXH_VECTOR == XXH_NEON */ + /*! + * Function-like macro: + * void XXH_SPLIT_IN_PLACE(uint64x2_t &in, uint32x2_t &outLo, uint32x2_t + * &outHi) + * { - /* - * VSX and Z Vector helpers. - * - * This is very messy, and any pull requests to clean this up are welcome. - * - * There are a lot of problems with supporting VSX and s390x, due to - * inconsistent intrinsics, spotty coverage, and multiple endiannesses. - */ - #if XXH_VECTOR == XXH_VSX - #if defined(__s390x__) - #include - #else - /* gcc's altivec.h can have the unwanted consequence to unconditionally - * #define bool, vector, and pixel keywords, - * with bad consequences for programs already using these keywords for - * other purposes. The paragraph defining these macros is skipped when - * __APPLE_ALTIVEC__ is defined. - * __APPLE_ALTIVEC__ is _generally_ defined automatically by the - * compiler, but it seems that, in some cases, it isn't. Force the build - * macro to be defined, so that keywords are not altered. + * outLo = (uint32x2_t)(in & 0xFFFFFFFF); + * outHi = (uint32x2_t)(in >> 32); + * in = UNDEFINED; + * } */ - #if defined(__GNUC__) && !defined(__APPLE_ALTIVEC__) - #define __APPLE_ALTIVEC__ + #if !defined(XXH_NO_VZIP_HACK) /* define to disable */ \ + && defined(__GNUC__) && !defined(__aarch64__) && \ + !defined(__arm64__) + #define XXH_SPLIT_IN_PLACE(in, outLo, outHi) \ + do { \ + \ + /* Undocumented GCC/Clang operand modifier: %e0 = lower D half, \ + * %f0 = upper D half */ \ + /* https://github.com/gcc-mirror/gcc/blob/38cf91e5/gcc/config/arm/arm.c#L22486 \ + */ \ + /* https://github.com/llvm-mirror/llvm/blob/2c4ca683/lib/Target/ARM/ARMAsmPrinter.cpp#L399 \ + */ \ + __asm__("vzip.32 %e0, %f0" : "+w"(in)); \ + (outLo) = vget_low_u32(vreinterpretq_u32_u64(in)); \ + (outHi) = vget_high_u32(vreinterpretq_u32_u64(in)); \ + \ + } while (0) + #else + #define XXH_SPLIT_IN_PLACE(in, outLo, outHi) \ + do { \ + \ + (outLo) = vmovn_u64(in); \ + (outHi) = vshrn_n_u64((in), 32); \ + \ + } while (0) + #endif + #endif /* XXH_VECTOR == XXH_NEON */ + + /* + * VSX and Z Vector helpers. + * + * This is very messy, and any pull requests to clean this up are welcome. + * + * There are a lot of problems with supporting VSX and s390x, due to + * inconsistent intrinsics, spotty coverage, and multiple endiannesses. + */ + #if XXH_VECTOR == XXH_VSX + #if defined(__s390x__) + #include + #else + /* gcc's altivec.h can have the unwanted consequence to + * unconditionally #define bool, vector, and pixel keywords, with bad + * consequences for programs already using these keywords for other + * purposes. The paragraph defining these macros is skipped when + * __APPLE_ALTIVEC__ is defined. + * __APPLE_ALTIVEC__ is _generally_ defined automatically by the + * compiler, but it seems that, in some cases, it isn't. Force the + * build macro to be defined, so that keywords are not altered. + */ + #if defined(__GNUC__) && !defined(__APPLE_ALTIVEC__) + #define __APPLE_ALTIVEC__ + #endif + #include #endif - #include - #endif typedef __vector unsigned long long xxh_u64x2; typedef __vector unsigned char xxh_u8x16; typedef __vector unsigned xxh_u32x4; - #ifndef XXH_VSX_BE - #if defined(__BIG_ENDIAN__) || \ - (defined(__BYTE_ORDER__) && \ - __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) - #define XXH_VSX_BE 1 - #elif defined(__VEC_ELEMENT_REG_ORDER__) && \ - __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__ - #warning \ - "-maltivec=be is not recommended. Please use native endianness." - #define XXH_VSX_BE 1 - #else - #define XXH_VSX_BE 0 - #endif - #endif /* !defined(XXH_VSX_BE) */ - - #if XXH_VSX_BE - /* A wrapper for POWER9's vec_revb. */ - #if defined(__POWER9_VECTOR__) || \ - (defined(__clang__) && defined(__s390x__)) - #define XXH_vec_revb vec_revb - #else + #ifndef XXH_VSX_BE + #if defined(__BIG_ENDIAN__) || \ + (defined(__BYTE_ORDER__) && \ + __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) + #define XXH_VSX_BE 1 + #elif defined(__VEC_ELEMENT_REG_ORDER__) && \ + __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__ + #warning \ + "-maltivec=be is not recommended. Please use native endianness." + #define XXH_VSX_BE 1 + #else + #define XXH_VSX_BE 0 + #endif + #endif /* !defined(XXH_VSX_BE) */ + + #if XXH_VSX_BE + #if defined(__POWER9_VECTOR__) || \ + (defined(__clang__) && defined(__s390x__)) + #define XXH_vec_revb vec_revb + #else +/*! + * A polyfill for POWER9's vec_revb(). + */ XXH_FORCE_INLINE xxh_u64x2 XXH_vec_revb(xxh_u64x2 val) { xxh_u8x16 const vByteSwap = {0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, @@ -2834,40 +3458,40 @@ XXH_FORCE_INLINE xxh_u64x2 XXH_vec_revb(xxh_u64x2 val) { } - #endif - #endif /* XXH_VSX_BE */ + #endif + #endif /* XXH_VSX_BE */ -/* - * Performs an unaligned load and byte swaps it on big endian. +/*! + * Performs an unaligned vector load and byte swaps it on big endian. */ XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr) { xxh_u64x2 ret; memcpy(&ret, ptr, sizeof(xxh_u64x2)); - #if XXH_VSX_BE + #if XXH_VSX_BE ret = XXH_vec_revb(ret); - #endif + #endif return ret; } - /* - * vec_mulo and vec_mule are very problematic intrinsics on PowerPC - * - * These intrinsics weren't added until GCC 8, despite existing for a - * while, and they are endian dependent. Also, their meaning swap - * depending on version. - * */ - #if defined(__s390x__) - /* s390x is always big endian, no issue on this platform */ - #define XXH_vec_mulo vec_mulo - #define XXH_vec_mule vec_mule - #elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw) - /* Clang has a better way to control this, we can just use the builtin - * which doesn't swap. */ - #define XXH_vec_mulo __builtin_altivec_vmulouw - #define XXH_vec_mule __builtin_altivec_vmuleuw - #else + /* + * vec_mulo and vec_mule are very problematic intrinsics on PowerPC + * + * These intrinsics weren't added until GCC 8, despite existing for a + * while, and they are endian dependent. Also, their meaning swap + * depending on version. + * */ + #if defined(__s390x__) + /* s390x is always big endian, no issue on this platform */ + #define XXH_vec_mulo vec_mulo + #define XXH_vec_mule vec_mule + #elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw) + /* Clang has a better way to control this, we can just use the builtin + * which doesn't swap. */ + #define XXH_vec_mulo __builtin_altivec_vmulouw + #define XXH_vec_mule __builtin_altivec_vmuleuw + #else /* gcc needs inline assembly */ /* Adapted from * https://github.com/google/highwayhash/blob/master/highwayhash/hh_vsx.h. */ @@ -2887,40 +3511,41 @@ XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4 a, xxh_u32x4 b) { } - #endif /* XXH_vec_mulo, XXH_vec_mule */ - #endif /* XXH_VECTOR == XXH_VSX */ + #endif /* XXH_vec_mulo, XXH_vec_mule */ + #endif /* XXH_VECTOR == XXH_VSX */ - /* prefetch - * can be disabled, by declaring XXH_NO_PREFETCH build macro */ - #if defined(XXH_NO_PREFETCH) - #define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */ - #else - #if defined(_MSC_VER) && \ - (defined(_M_X64) || \ - defined( \ - _M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ - #include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ - #define XXH_PREFETCH(ptr) _mm_prefetch((const char *)(ptr), _MM_HINT_T0) - #elif defined(__GNUC__) && \ - ((__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 1))) - #define XXH_PREFETCH(ptr) \ - __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) - #else + /* prefetch + * can be disabled, by declaring XXH_NO_PREFETCH build macro */ + #if defined(XXH_NO_PREFETCH) #define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */ - #endif - #endif /* XXH_NO_PREFETCH */ + #else + #if defined(_MSC_VER) && \ + (defined(_M_X64) || \ + defined( \ + _M_IX86)) /* _mm_prefetch() not defined outside of x86/x64 */ + #include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ + #define XXH_PREFETCH(ptr) \ + _mm_prefetch((const char *)(ptr), _MM_HINT_T0) + #elif defined(__GNUC__) && \ + ((__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 1))) + #define XXH_PREFETCH(ptr) \ + __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) + #else + #define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */ + #endif + #endif /* XXH_NO_PREFETCH */ - /* ========================================== - * XXH3 default settings - * ========================================== */ + /* ========================================== + * XXH3 default settings + * ========================================== */ - #define XXH_SECRET_DEFAULT_SIZE 192 /* minimum XXH3_SECRET_SIZE_MIN */ + #define XXH_SECRET_DEFAULT_SIZE 192 /* minimum XXH3_SECRET_SIZE_MIN */ - #if (XXH_SECRET_DEFAULT_SIZE < XXH3_SECRET_SIZE_MIN) - #error "default keyset is not large enough" - #endif + #if (XXH_SECRET_DEFAULT_SIZE < XXH3_SECRET_SIZE_MIN) + #error "default keyset is not large enough" + #endif -/* Pseudorandom secret taken directly from FARSH */ +/*! Pseudorandom secret taken directly from FARSH. */ XXH_ALIGN(64) static const xxh_u8 XXH3_kSecret[XXH_SECRET_DEFAULT_SIZE] = { @@ -2943,69 +3568,79 @@ static const xxh_u8 XXH3_kSecret[XXH_SECRET_DEFAULT_SIZE] = { }; - #ifdef XXH_OLD_NAMES - #define kSecret XXH3_kSecret - #endif + #ifdef XXH_OLD_NAMES + #define kSecret XXH3_kSecret + #endif - /* - * Calculates a 32-bit to 64-bit long multiply. - * - * Wraps __emulu on MSVC x86 because it tends to call __allmul when it - * doesn't need to (but it shouldn't need to anyways, it is about 7 - * instructions to do a 64x64 multiply...). Since we know that this will - * _always_ emit MULL, we use that instead of the normal method. - * - * If you are compiling for platforms like Thumb-1 and don't have a better - * option, you may also want to write your own long multiply routine here. - * - * XXH_FORCE_INLINE xxh_u64 XXH_mult32to64(xxh_u64 x, xxh_u64 y) - * { + #ifdef XXH_DOXYGEN +/*! + * @brief Calculates a 32-bit to 64-bit long multiply. + * + * Implemented as a macro. + * + * Wraps `__emulu` on MSVC x86 because it tends to call `__allmul` when it + * doesn't need to (but it shouldn't need to anyways, it is about 7 instructions + * to do a 64x64 multiply...). Since we know that this will _always_ emit + * `MULL`, we use that instead of the normal method. + * + * If you are compiling for platforms like Thumb-1 and don't have a better + * option, you may also want to write your own long multiply routine here. + * + * @param x, y Numbers to be multiplied + * @return 64-bit product of the low 32 bits of @p x and @p y. + */ +XXH_FORCE_INLINE xxh_u64 XXH_mult32to64(xxh_u64 x, xxh_u64 y) { - * return (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF); - * } - */ - #if defined(_MSC_VER) && defined(_M_IX86) - #include - #define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y)) - #else - /* - * Downcast + upcast is usually better than masking on older compilers - * like GCC 4.2 (especially 32-bit ones), all without affecting newer - * compilers. - * - * The other method, (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF), will AND both - * operands and perform a full 64x64 multiply -- entirely redundant on - * 32-bit. - */ - #define XXH_mult32to64(x, y) \ - ((xxh_u64)(xxh_u32)(x) * (xxh_u64)(xxh_u32)(y)) - #endif + return (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF); -/* - * Calculates a 64->128-bit long multiply. +} + + #elif defined(_MSC_VER) && defined(_M_IX86) + #include + #define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y)) + #else + /* + * Downcast + upcast is usually better than masking on older compilers + * like GCC 4.2 (especially 32-bit ones), all without affecting newer + * compilers. + * + * The other method, (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF), will AND both + * operands and perform a full 64x64 multiply -- entirely redundant on + * 32-bit. + */ + #define XXH_mult32to64(x, y) \ + ((xxh_u64)(xxh_u32)(x) * (xxh_u64)(xxh_u32)(y)) + #endif + +/*! + * @brief Calculates a 64->128-bit long multiply. + * + * Uses `__uint128_t` and `_umul128` if available, otherwise uses a scalar + * version. * - * Uses __uint128_t and _umul128 if available, otherwise uses a scalar version. + * @param lhs, rhs The 64-bit integers to be multiplied + * @return The 128-bit result represented in an @ref XXH128_hash_t. */ static XXH128_hash_t XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs) { - /* - * GCC/Clang __uint128_t method. - * - * On most 64-bit targets, GCC and Clang define a __uint128_t type. - * This is usually the best way as it usually uses a native long 64-bit - * multiply, such as MULQ on x86_64 or MUL + UMULH on aarch64. - * - * Usually. - * - * Despite being a 32-bit platform, Clang (and emscripten) define this type - * despite not having the arithmetic for it. This results in a laggy - * compiler builtin call which calculates a full 128-bit multiply. - * In that case it is best to use the portable one. - * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677 - */ - #if defined(__GNUC__) && !defined(__wasm__) && \ - defined(__SIZEOF_INT128__) || \ - (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) + /* + * GCC/Clang __uint128_t method. + * + * On most 64-bit targets, GCC and Clang define a __uint128_t type. + * This is usually the best way as it usually uses a native long 64-bit + * multiply, such as MULQ on x86_64 or MUL + UMULH on aarch64. + * + * Usually. + * + * Despite being a 32-bit platform, Clang (and emscripten) define this + * type despite not having the arithmetic for it. This results in a laggy + * compiler builtin call which calculates a full 128-bit multiply. + * In that case it is best to use the portable one. + * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677 + */ + #if defined(__GNUC__) && !defined(__wasm__) && \ + defined(__SIZEOF_INT128__) || \ + (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) __uint128_t const product = (__uint128_t)lhs * (__uint128_t)rhs; XXH128_hash_t r128; @@ -3013,19 +3648,19 @@ static XXH128_hash_t XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs) { r128.high64 = (xxh_u64)(product >> 64); return r128; - /* - * MSVC for x64's _umul128 method. - * - * xxh_u64 _umul128(xxh_u64 Multiplier, xxh_u64 Multiplicand, xxh_u64 - * *HighProduct); - * - * This compiles to single operand MUL on x64. - */ - #elif defined(_M_X64) || defined(_M_IA64) + /* + * MSVC for x64's _umul128 method. + * + * xxh_u64 _umul128(xxh_u64 Multiplier, xxh_u64 Multiplicand, xxh_u64 + * *HighProduct); + * + * This compiles to single operand MUL on x64. + */ + #elif defined(_M_X64) || defined(_M_IA64) - #ifndef _MSC_VER - #pragma intrinsic(_umul128) - #endif + #ifndef _MSC_VER + #pragma intrinsic(_umul128) + #endif xxh_u64 product_high; xxh_u64 const product_low = _umul128(lhs, rhs, &product_high); XXH128_hash_t r128; @@ -3033,7 +3668,7 @@ static XXH128_hash_t XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs) { r128.high64 = product_high; return r128; - #else + #else /* * Portable scalar method. Optimized for 32-bit and 64-bit ALUs. * @@ -3093,16 +3728,20 @@ static XXH128_hash_t XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs) { r128.low64 = lower; r128.high64 = upper; return r128; - #endif + #endif } -/* - * Does a 64-bit to 128-bit multiply, then XOR folds it. +/*! + * @brief Calculates a 64-bit to 128-bit multiply, then XOR folds it. * * The reason for the separate function is to prevent passing too many structs * around by value. This will hopefully inline the multiply, but we don't force * it. + * + * @param lhs, rhs The 64-bit integers to multiply + * @return The low 64 bits of the product XOR'd by the high 64 bits. + * @see XXH_mult64to128() */ static xxh_u64 XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs) { @@ -3111,7 +3750,7 @@ static xxh_u64 XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs) { } -/* Seems to produce slightly better code on GCC for some reason. */ +/*! Seems to produce slightly better code on GCC for some reason. */ XXH_FORCE_INLINE xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift) { XXH_ASSERT(0 <= shift && shift < 64); @@ -3216,7 +3855,7 @@ XXH_FORCE_INLINE XXH64_hash_t XXH3_len_4to8_64b(const xxh_u8 *input, size_t len, XXH_ASSERT(input != NULL); XXH_ASSERT(secret != NULL); - XXH_ASSERT(4 <= len && len < 8); + XXH_ASSERT(4 <= len && len <= 8); seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32; { @@ -3239,7 +3878,7 @@ XXH_FORCE_INLINE XXH64_hash_t XXH3_len_9to16_64b(const xxh_u8 *input, XXH_ASSERT(input != NULL); XXH_ASSERT(secret != NULL); - XXH_ASSERT(8 <= len && len <= 16); + XXH_ASSERT(9 <= len && len <= 16); { xxh_u64 const bitflip1 = @@ -3306,11 +3945,10 @@ XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(const xxh_u8 *XXH_RESTRICT input, const xxh_u8 *XXH_RESTRICT secret, xxh_u64 seed64) { - #if defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \ - && defined(__i386__) && defined(__SSE2__) /* x86 + SSE2 */ \ - && \ - !defined( \ - XXH_ENABLE_AUTOVECTORIZE) /* Define to disable like XXH32 hack */ + #if defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \ + && defined(__i386__) && defined(__SSE2__) /* x86 + SSE2 */ \ + && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable like \ + XXH32 hack */ /* * UGLY HACK: * GCC for x86 tends to autovectorize the 128-bit multiply, resulting in @@ -3326,8 +3964,8 @@ XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(const xxh_u8 *XXH_RESTRICT input, * GCC generates much better scalar code than Clang for the rest of XXH3, * which is why finding a more optimal codepath is an interest. */ - __asm__("" : "+r"(seed64)); - #endif + XXH_COMPILER_GUARD(seed64); + #endif { xxh_u64 const input_lo = XXH_readLE64(input); @@ -3381,7 +4019,7 @@ XXH_FORCE_INLINE XXH64_hash_t XXH3_len_17to128_64b( } - #define XXH3_MIDSIZE_MAX 240 + #define XXH3_MIDSIZE_MAX 240 XXH_NO_INLINE XXH64_hash_t XXH3_len_129to240_64b( const xxh_u8 *XXH_RESTRICT input, size_t len, @@ -3391,8 +4029,8 @@ XXH_NO_INLINE XXH64_hash_t XXH3_len_129to240_64b( (void)secretSize; XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX); - #define XXH3_MIDSIZE_STARTOFFSET 3 - #define XXH3_MIDSIZE_LASTOFFSET 17 + #define XXH3_MIDSIZE_STARTOFFSET 3 + #define XXH3_MIDSIZE_LASTOFFSET 17 { @@ -3407,31 +4045,31 @@ XXH_NO_INLINE XXH64_hash_t XXH3_len_129to240_64b( acc = XXH3_avalanche(acc); XXH_ASSERT(nbRounds >= 8); - #if defined(__clang__) /* Clang */ \ - && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \ - && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */ - /* - * UGLY HACK: - * Clang for ARMv7-A tries to vectorize this loop, similar to GCC x86. - * In everywhere else, it uses scalar code. - * - * For 64->128-bit multiplies, even if the NEON was 100% optimal, it - * would still be slower than UMAAL (see XXH_mult64to128). - * - * Unfortunately, Clang doesn't handle the long multiplies properly and - * converts them to the nonexistent "vmulq_u64" intrinsic, which is then - * scalarized into an ugly mess of VMOV.32 instructions. - * - * This mess is difficult to avoid without turning autovectorization - * off completely, but they are usually relatively minor and/or not - * worth it to fix. - * - * This loop is the easiest to fix, as unlike XXH32, this pragma - * _actually works_ because it is a loop vectorization instead of an - * SLP vectorization. - */ - #pragma clang loop vectorize(disable) - #endif + #if defined(__clang__) /* Clang */ \ + && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \ + && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */ + /* + * UGLY HACK: + * Clang for ARMv7-A tries to vectorize this loop, similar to GCC x86. + * In everywhere else, it uses scalar code. + * + * For 64->128-bit multiplies, even if the NEON was 100% optimal, it + * would still be slower than UMAAL (see XXH_mult64to128). + * + * Unfortunately, Clang doesn't handle the long multiplies properly and + * converts them to the nonexistent "vmulq_u64" intrinsic, which is then + * scalarized into an ugly mess of VMOV.32 instructions. + * + * This mess is difficult to avoid without turning autovectorization + * off completely, but they are usually relatively minor and/or not + * worth it to fix. + * + * This loop is the easiest to fix, as unlike XXH32, this pragma + * _actually works_ because it is a loop vectorization instead of an + * SLP vectorization. + */ + #pragma clang loop vectorize(disable) + #endif for (i = 8; i < nbRounds; i++) { acc += @@ -3450,17 +4088,17 @@ XXH_NO_INLINE XXH64_hash_t XXH3_len_129to240_64b( } - /* ======= Long Keys ======= */ + /* ======= Long Keys ======= */ - #define XXH_STRIPE_LEN 64 - #define XXH_SECRET_CONSUME_RATE \ - 8 /* nb of secret bytes consumed at each accumulation */ - #define XXH_ACC_NB (XXH_STRIPE_LEN / sizeof(xxh_u64)) + #define XXH_STRIPE_LEN 64 + #define XXH_SECRET_CONSUME_RATE \ + 8 /* nb of secret bytes consumed at each accumulation */ + #define XXH_ACC_NB (XXH_STRIPE_LEN / sizeof(xxh_u64)) - #ifdef XXH_OLD_NAMES - #define STRIPE_LEN XXH_STRIPE_LEN - #define ACC_NB XXH_ACC_NB - #endif + #ifdef XXH_OLD_NAMES + #define STRIPE_LEN XXH_STRIPE_LEN + #define ACC_NB XXH_ACC_NB + #endif XXH_FORCE_INLINE void XXH_writeLE64(void *dst, xxh_u64 v64) { @@ -3469,56 +4107,58 @@ XXH_FORCE_INLINE void XXH_writeLE64(void *dst, xxh_u64 v64) { } - /* Several intrinsic functions below are supposed to accept __int64 as - * argument, as documented in - * https://software.intel.com/sites/landingpage/IntrinsicsGuide/ . However, - * several environments do not define __int64 type, requiring a workaround. - */ - #if !defined(__VMS) && \ - (defined(__cplusplus) || (defined(__STDC_VERSION__) && \ - (__STDC_VERSION__ >= 199901L) /* C99 */)) + /* Several intrinsic functions below are supposed to accept __int64 as + * argument, as documented in + * https://software.intel.com/sites/landingpage/IntrinsicsGuide/ . + * However, several environments do not define __int64 type, + * requiring a workaround. + */ + #if !defined(__VMS) && \ + (defined(__cplusplus) || (defined(__STDC_VERSION__) && \ + (__STDC_VERSION__ >= 199901L) /* C99 */)) typedef int64_t xxh_i64; - #else + #else /* the following type must have a width of 64-bit */ typedef long long xxh_i64; - #endif + #endif - /* - * XXH3_accumulate_512 is the tightest loop for long inputs, and it is the - * most optimized. - * - * It is a hardened version of UMAC, based off of FARSH's implementation. - * - * This was chosen because it adapts quite well to 32-bit, 64-bit, and SIMD - * implementations, and it is ridiculously fast. - * - * We harden it by mixing the original input to the accumulators as well as - * the product. - * - * This means that in the (relatively likely) case of a multiply by zero, the - * original input is preserved. - * - * On 128-bit inputs, we swap 64-bit pairs when we add the input to improve - * cross-pollination, as otherwise the upper and lower halves would be - * essentially independent. - * - * This doesn't matter on 64-bit hashes since they all get merged together in - * the end, so we skip the extra step. - * - * Both XXH3_64bits and XXH3_128bits use this subroutine. - */ + /* + * XXH3_accumulate_512 is the tightest loop for long inputs, and it is the + * most optimized. + * + * It is a hardened version of UMAC, based off of FARSH's implementation. + * + * This was chosen because it adapts quite well to 32-bit, 64-bit, and SIMD + * implementations, and it is ridiculously fast. + * + * We harden it by mixing the original input to the accumulators as well as + * the product. + * + * This means that in the (relatively likely) case of a multiply by zero, + * the original input is preserved. + * + * On 128-bit inputs, we swap 64-bit pairs when we add the input to improve + * cross-pollination, as otherwise the upper and lower halves would be + * essentially independent. + * + * This doesn't matter on 64-bit hashes since they all get merged together + * in the end, so we skip the extra step. + * + * Both XXH3_64bits and XXH3_128bits use this subroutine. + */ - #if (XXH_VECTOR == XXH_AVX512) || defined(XXH_X86DISPATCH) + #if (XXH_VECTOR == XXH_AVX512) || \ + (defined(XXH_DISPATCH_AVX512) && XXH_DISPATCH_AVX512 != 0) - #ifndef XXH_TARGET_AVX512 - #define XXH_TARGET_AVX512 /* disable attribute target */ - #endif + #ifndef XXH_TARGET_AVX512 + #define XXH_TARGET_AVX512 /* disable attribute target */ + #endif XXH_FORCE_INLINE XXH_TARGET_AVX512 void XXH3_accumulate_512_avx512( void *XXH_RESTRICT acc, const void *XXH_RESTRICT input, const void *XXH_RESTRICT secret) { - XXH_ALIGN(64) __m512i *const xacc = (__m512i *)acc; + __m512i *const xacc = (__m512i *)acc; XXH_ASSERT((((size_t)acc) & 63) == 0); XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i)); @@ -3576,8 +4216,8 @@ XXH_FORCE_INLINE XXH_TARGET_AVX512 void XXH3_scrambleAcc_avx512( XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i)); { - XXH_ALIGN(64) __m512i *const xacc = (__m512i *)acc; - const __m512i prime32 = _mm512_set1_epi32((int)XXH_PRIME32_1); + __m512i *const xacc = (__m512i *)acc; + const __m512i prime32 = _mm512_set1_epi32((int)XXH_PRIME32_1); /* xacc[0] ^= (xacc[0] >> 47) */ __m512i const acc_vec = *xacc; @@ -3609,19 +4249,21 @@ XXH_FORCE_INLINE XXH_TARGET_AVX512 void XXH3_initCustomSecret_avx512( int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m512i); __m512i const seed = _mm512_mask_set1_epi64( - _mm512_set1_epi64((xxh_i64)seed64), 0xAA, -(xxh_i64)seed64); + _mm512_set1_epi64((xxh_i64)seed64), 0xAA, (xxh_i64)(0U - seed64)); - XXH_ALIGN(64) const __m512i *const src = (const __m512i *)XXH3_kSecret; - XXH_ALIGN(64) __m512i *const dest = (__m512i *)customSecret; - int i; + const __m512i *const src = (const __m512i *)((const void *)XXH3_kSecret); + __m512i *const dest = (__m512i *)customSecret; + int i; + XXH_ASSERT(((size_t)src & 63) == 0); /* control alignment */ + XXH_ASSERT(((size_t)dest & 63) == 0); for (i = 0; i < nbRounds; ++i) { /* GCC has a bug, _mm512_stream_load_si512 accepts 'void*', not 'void - * const*', this will warn "discards ‘const’ qualifier". */ + * const*', this will warn "discards 'const' qualifier". */ union { - XXH_ALIGN(64) const __m512i *cp; - XXH_ALIGN(64) void *p; + const __m512i *cp; + void * p; } remote_const_void; @@ -3635,13 +4277,14 @@ XXH_FORCE_INLINE XXH_TARGET_AVX512 void XXH3_initCustomSecret_avx512( } - #endif + #endif - #if (XXH_VECTOR == XXH_AVX2) || defined(XXH_X86DISPATCH) + #if (XXH_VECTOR == XXH_AVX2) || \ + (defined(XXH_DISPATCH_AVX2) && XXH_DISPATCH_AVX2 != 0) - #ifndef XXH_TARGET_AVX2 - #define XXH_TARGET_AVX2 /* disable attribute target */ - #endif + #ifndef XXH_TARGET_AVX2 + #define XXH_TARGET_AVX2 /* disable attribute target */ + #endif XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_accumulate_512_avx2( void *XXH_RESTRICT acc, const void *XXH_RESTRICT input, @@ -3650,7 +4293,7 @@ XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_accumulate_512_avx2( XXH_ASSERT((((size_t)acc) & 31) == 0); { - XXH_ALIGN(32) __m256i *const xacc = (__m256i *)acc; + __m256i *const xacc = (__m256i *)acc; /* Unaligned. This is mainly for pointer arithmetic, and because * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */ @@ -3692,7 +4335,7 @@ XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_scrambleAcc_avx2( XXH_ASSERT((((size_t)acc) & 31) == 0); { - XXH_ALIGN(32) __m256i *const xacc = (__m256i *)acc; + __m256i *const xacc = (__m256i *)acc; /* Unaligned. This is mainly for pointer arithmetic, and because * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */ const __m256i *const xsecret = (const __m256i *)secret; @@ -3732,24 +4375,23 @@ XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_initCustomSecret_avx2( XXH_PREFETCH(customSecret); { - __m256i const seed = _mm256_set_epi64x(-(xxh_i64)seed64, (xxh_i64)seed64, - -(xxh_i64)seed64, (xxh_i64)seed64); + __m256i const seed = + _mm256_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64, + (xxh_i64)(0U - seed64), (xxh_i64)seed64); - XXH_ALIGN(64) const __m256i *const src = (const __m256i *)XXH3_kSecret; - XXH_ALIGN(64) __m256i * dest = (__m256i *)customSecret; + const __m256i *const src = (const __m256i *)((const void *)XXH3_kSecret); + __m256i * dest = (__m256i *)customSecret; - #if defined(__GNUC__) || defined(__clang__) + #if defined(__GNUC__) || defined(__clang__) /* * On GCC & Clang, marking 'dest' as modified will cause the compiler: * - do not extract the secret from sse registers in the internal loop * - use less common registers, and avoid pushing these reg into stack - * The asm hack causes Clang to assume that XXH3_kSecretPtr aliases with - * customSecret, and on aarch64, this prevented LDP from merging two - * loads together for free. Putting the loads together before the stores - * properly generates LDP. */ - __asm__("" : "+r"(dest)); - #endif + XXH_COMPILER_GUARD(dest); + #endif + XXH_ASSERT(((size_t)src & 31) == 0); /* control alignment */ + XXH_ASSERT(((size_t)dest & 31) == 0); /* GCC -O2 need unroll loop manually */ dest[0] = _mm256_add_epi64(_mm256_stream_load_si256(src + 0), seed); @@ -3763,13 +4405,14 @@ XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_initCustomSecret_avx2( } - #endif + #endif - #if (XXH_VECTOR == XXH_SSE2) || defined(XXH_X86DISPATCH) + /* x86dispatch always generates SSE2 */ + #if (XXH_VECTOR == XXH_SSE2) || defined(XXH_X86DISPATCH) - #ifndef XXH_TARGET_SSE2 - #define XXH_TARGET_SSE2 /* disable attribute target */ - #endif + #ifndef XXH_TARGET_SSE2 + #define XXH_TARGET_SSE2 /* disable attribute target */ + #endif XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_accumulate_512_sse2( void *XXH_RESTRICT acc, const void *XXH_RESTRICT input, @@ -3779,7 +4422,7 @@ XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_accumulate_512_sse2( XXH_ASSERT((((size_t)acc) & 15) == 0); { - XXH_ALIGN(16) __m128i *const xacc = (__m128i *)acc; + __m128i *const xacc = (__m128i *)acc; /* Unaligned. This is mainly for pointer arithmetic, and because * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */ const __m128i *const xinput = (const __m128i *)input; @@ -3820,7 +4463,7 @@ XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_scrambleAcc_sse2( XXH_ASSERT((((size_t)acc) & 15) == 0); { - XXH_ALIGN(16) __m128i *const xacc = (__m128i *)acc; + __m128i *const xacc = (__m128i *)acc; /* Unaligned. This is mainly for pointer arithmetic, and because * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */ const __m128i *const xsecret = (const __m128i *)secret; @@ -3859,30 +4502,34 @@ XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2( int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m128i); - #if defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER < 1900 - // MSVC 32bit mode does not support _mm_set_epi64x before 2015 + #if defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER < 1900 + /* MSVC 32bit mode does not support _mm_set_epi64x before 2015 */ XXH_ALIGN(16) - const xxh_i64 seed64x2[2] = {(xxh_i64)seed64, -(xxh_i64)seed64}; + const xxh_i64 seed64x2[2] = {(xxh_i64)seed64, (xxh_i64)(0U - seed64)}; __m128i const seed = _mm_load_si128((__m128i const *)seed64x2); - #else - __m128i const seed = _mm_set_epi64x(-(xxh_i64)seed64, (xxh_i64)seed64); - #endif + #else + __m128i const seed = + _mm_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64); + #endif int i; - XXH_ALIGN(64) const float *const src = (float const *)XXH3_kSecret; - XXH_ALIGN(XXH_SEC_ALIGN) __m128i *dest = (__m128i *)customSecret; - #if defined(__GNUC__) || defined(__clang__) + const void *const src16 = XXH3_kSecret; + __m128i * dst16 = (__m128i *)customSecret; + #if defined(__GNUC__) || defined(__clang__) /* * On GCC & Clang, marking 'dest' as modified will cause the compiler: * - do not extract the secret from sse registers in the internal loop * - use less common registers, and avoid pushing these reg into stack */ - __asm__("" : "+r"(dest)); - #endif + XXH_COMPILER_GUARD(dst16); + #endif + XXH_ASSERT(((size_t)src16 & 15) == 0); /* control alignment */ + XXH_ASSERT(((size_t)dst16 & 15) == 0); for (i = 0; i < nbRounds; ++i) { - dest[i] = _mm_add_epi64(_mm_castps_si128(_mm_load_ps(src + i * 4)), seed); + dst16[i] = + _mm_add_epi64(_mm_load_si128((const __m128i *)src16 + i), seed); } @@ -3890,9 +4537,9 @@ XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2( } - #endif + #endif - #if (XXH_VECTOR == XXH_NEON) + #if (XXH_VECTOR == XXH_NEON) XXH_FORCE_INLINE void XXH3_accumulate_512_neon( void *XXH_RESTRICT acc, const void *XXH_RESTRICT input, @@ -3901,7 +4548,7 @@ XXH_FORCE_INLINE void XXH3_accumulate_512_neon( XXH_ASSERT((((size_t)acc) & 15) == 0); { - XXH_ALIGN(16) uint64x2_t *const xacc = (uint64x2_t *)acc; + uint64x2_t *const xacc = (uint64x2_t *)acc; /* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */ uint8_t const *const xinput = (const uint8_t *)input; @@ -3996,9 +4643,9 @@ XXH_FORCE_INLINE void XXH3_scrambleAcc_neon(void *XXH_RESTRICT acc, } - #endif + #endif - #if (XXH_VECTOR == XXH_VSX) + #if (XXH_VECTOR == XXH_VSX) XXH_FORCE_INLINE void XXH3_accumulate_512_vsx(void *XXH_RESTRICT acc, const void *XXH_RESTRICT input, @@ -4025,12 +4672,12 @@ XXH_FORCE_INLINE void XXH3_accumulate_512_vsx(void *XXH_RESTRICT acc, xxh_u64x2 const product = XXH_vec_mulo((xxh_u32x4)data_key, shuffled); xacc[i] += product; - /* swap high and low halves */ - #ifdef __s390x__ + /* swap high and low halves */ + #ifdef __s390x__ xacc[i] += vec_permi(data_vec, data_vec, 2); - #else + #else xacc[i] += vec_xxpermdi(data_vec, data_vec, 2); - #endif + #endif } @@ -4075,7 +4722,7 @@ XXH_FORCE_INLINE void XXH3_scrambleAcc_vsx(void *XXH_RESTRICT acc, } - #endif + #endif /* scalar variants - universal */ @@ -4083,7 +4730,6 @@ XXH_FORCE_INLINE void XXH3_accumulate_512_scalar( void *XXH_RESTRICT acc, const void *XXH_RESTRICT input, const void *XXH_RESTRICT secret) { - XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 *const xacc = (xxh_u64 *)acc; /* presumed aligned */ const xxh_u8 *const xinput = (const xxh_u8 *)input; /* no alignment restriction */ @@ -4105,7 +4751,6 @@ XXH_FORCE_INLINE void XXH3_accumulate_512_scalar( XXH_FORCE_INLINE void XXH3_scrambleAcc_scalar(void *XXH_RESTRICT acc, const void *XXH_RESTRICT secret) { - XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 *const xacc = (xxh_u64 *)acc; /* presumed aligned */ const xxh_u8 *const xsecret = (const xxh_u8 *)secret; /* no alignment restriction */ @@ -4135,7 +4780,7 @@ XXH_FORCE_INLINE void XXH3_initCustomSecret_scalar( const xxh_u8 *kSecretPtr = XXH3_kSecret; XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0); - #if defined(__clang__) && defined(__aarch64__) + #if defined(__clang__) && defined(__aarch64__) /* * UGLY HACK: * Clang generates a bunch of MOV/MOVK pairs for aarch64, and they are @@ -4164,8 +4809,8 @@ XXH_FORCE_INLINE void XXH3_initCustomSecret_scalar( * without hack: 2654.4 MB/s * with hack: 3202.9 MB/s */ - __asm__("" : "+r"(kSecretPtr)); - #endif + XXH_COMPILER_GUARD(kSecretPtr); + #endif /* * Note: in debug mode, this overrides the asm optimization * and Clang will emit MOVK chains again. @@ -4200,55 +4845,55 @@ typedef void (*XXH3_f_accumulate_512)(void *XXH_RESTRICT, const void *, typedef void (*XXH3_f_scrambleAcc)(void *XXH_RESTRICT, const void *); typedef void (*XXH3_f_initCustomSecret)(void *XXH_RESTRICT, xxh_u64); - #if (XXH_VECTOR == XXH_AVX512) + #if (XXH_VECTOR == XXH_AVX512) - #define XXH3_accumulate_512 XXH3_accumulate_512_avx512 - #define XXH3_scrambleAcc XXH3_scrambleAcc_avx512 - #define XXH3_initCustomSecret XXH3_initCustomSecret_avx512 + #define XXH3_accumulate_512 XXH3_accumulate_512_avx512 + #define XXH3_scrambleAcc XXH3_scrambleAcc_avx512 + #define XXH3_initCustomSecret XXH3_initCustomSecret_avx512 - #elif (XXH_VECTOR == XXH_AVX2) + #elif (XXH_VECTOR == XXH_AVX2) - #define XXH3_accumulate_512 XXH3_accumulate_512_avx2 - #define XXH3_scrambleAcc XXH3_scrambleAcc_avx2 - #define XXH3_initCustomSecret XXH3_initCustomSecret_avx2 + #define XXH3_accumulate_512 XXH3_accumulate_512_avx2 + #define XXH3_scrambleAcc XXH3_scrambleAcc_avx2 + #define XXH3_initCustomSecret XXH3_initCustomSecret_avx2 - #elif (XXH_VECTOR == XXH_SSE2) + #elif (XXH_VECTOR == XXH_SSE2) - #define XXH3_accumulate_512 XXH3_accumulate_512_sse2 - #define XXH3_scrambleAcc XXH3_scrambleAcc_sse2 - #define XXH3_initCustomSecret XXH3_initCustomSecret_sse2 + #define XXH3_accumulate_512 XXH3_accumulate_512_sse2 + #define XXH3_scrambleAcc XXH3_scrambleAcc_sse2 + #define XXH3_initCustomSecret XXH3_initCustomSecret_sse2 - #elif (XXH_VECTOR == XXH_NEON) + #elif (XXH_VECTOR == XXH_NEON) - #define XXH3_accumulate_512 XXH3_accumulate_512_neon - #define XXH3_scrambleAcc XXH3_scrambleAcc_neon - #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar + #define XXH3_accumulate_512 XXH3_accumulate_512_neon + #define XXH3_scrambleAcc XXH3_scrambleAcc_neon + #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar - #elif (XXH_VECTOR == XXH_VSX) + #elif (XXH_VECTOR == XXH_VSX) - #define XXH3_accumulate_512 XXH3_accumulate_512_vsx - #define XXH3_scrambleAcc XXH3_scrambleAcc_vsx - #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar + #define XXH3_accumulate_512 XXH3_accumulate_512_vsx + #define XXH3_scrambleAcc XXH3_scrambleAcc_vsx + #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar - #else /* scalar */ + #else /* scalar */ - #define XXH3_accumulate_512 XXH3_accumulate_512_scalar - #define XXH3_scrambleAcc XXH3_scrambleAcc_scalar - #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar + #define XXH3_accumulate_512 XXH3_accumulate_512_scalar + #define XXH3_scrambleAcc XXH3_scrambleAcc_scalar + #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar - #endif + #endif - #ifndef XXH_PREFETCH_DIST - #ifdef __clang__ - #define XXH_PREFETCH_DIST 320 - #else - #if (XXH_VECTOR == XXH_AVX512) - #define XXH_PREFETCH_DIST 512 + #ifndef XXH_PREFETCH_DIST + #ifdef __clang__ + #define XXH_PREFETCH_DIST 320 #else - #define XXH_PREFETCH_DIST 384 - #endif - #endif /* __clang__ */ - #endif /* XXH_PREFETCH_DIST */ + #if (XXH_VECTOR == XXH_AVX512) + #define XXH_PREFETCH_DIST 512 + #else + #define XXH_PREFETCH_DIST 384 + #endif + #endif /* __clang__ */ + #endif /* XXH_PREFETCH_DIST */ /* * XXH3_accumulate() @@ -4308,8 +4953,9 @@ XXH_FORCE_INLINE void XXH3_hashLong_internal_loop( { const xxh_u8 *const p = input + len - XXH_STRIPE_LEN; - #define XXH_SECRET_LASTACC_START \ - 7 /* not aligned on 8, last secret is different from acc & scrambler */ + #define XXH_SECRET_LASTACC_START \ + 7 /* not aligned on 8, last secret is different from acc & scrambler \ + */ f_acc512(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START); @@ -4337,10 +4983,10 @@ static XXH64_hash_t XXH3_mergeAccs(const xxh_u64 *XXH_RESTRICT acc, for (i = 0; i < 4; i++) { result64 += XXH3_mix2Accs(acc + 2 * i, secret + 16 * i); - #if defined(__clang__) /* Clang */ \ - && (defined(__arm__) || defined(__thumb__)) /* ARMv7 */ \ - && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \ - && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */ + #if defined(__clang__) /* Clang */ \ + && (defined(__arm__) || defined(__thumb__)) /* ARMv7 */ \ + && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \ + && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */ /* * UGLY HACK: * Prevent autovectorization on Clang ARMv7-a. Exact same problem as @@ -4349,8 +4995,8 @@ static XXH64_hash_t XXH3_mergeAccs(const xxh_u64 *XXH_RESTRICT acc, * without hack: 2063.7 MB/s * with hack: 2560.7 MB/s */ - __asm__("" : "+r"(result64)); - #endif + XXH_COMPILER_GUARD(result64); + #endif } @@ -4358,13 +5004,13 @@ static XXH64_hash_t XXH3_mergeAccs(const xxh_u64 *XXH_RESTRICT acc, } - #define XXH3_INIT_ACC \ - { \ - \ - XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3, \ - XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, XXH_PRIME32_1 \ - \ - } + #define XXH3_INIT_ACC \ + { \ + \ + XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3, \ + XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, XXH_PRIME32_1 \ + \ + } XXH_FORCE_INLINE XXH64_hash_t XXH3_hashLong_64b_internal( const void *XXH_RESTRICT input, size_t len, const void *XXH_RESTRICT secret, @@ -4379,9 +5025,9 @@ XXH_FORCE_INLINE XXH64_hash_t XXH3_hashLong_64b_internal( /* converge into final hash */ XXH_STATIC_ASSERT(sizeof(acc) == 64); - /* do not align on 8, so that the secret is different from the accumulator - */ - #define XXH_SECRET_MERGEACCS_START 11 + /* do not align on 8, so that the secret is different from the accumulator + */ + #define XXH_SECRET_MERGEACCS_START 11 XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START); return XXH3_mergeAccs(acc, (const xxh_u8 *)secret + XXH_SECRET_MERGEACCS_START, @@ -4501,6 +5147,7 @@ XXH3_64bits_internal(const void *XXH_RESTRICT input, size_t len, /* === Public entry point === */ +/*! @ingroup xxh3_family */ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void *input, size_t len) { return XXH3_64bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), @@ -4508,6 +5155,7 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void *input, size_t len) { } +/*! @ingroup xxh3_family */ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(const void *input, size_t len, const void *secret, @@ -4518,6 +5166,7 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(const void *input, } +/*! @ingroup xxh3_family */ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed(const void *input, size_t len, XXH64_hash_t seed) { @@ -4603,6 +5252,7 @@ static void XXH_alignedFree(void *p) { } +/*! @ingroup xxh3_family */ XXH_PUBLIC_API XXH3_state_t *XXH3_createState(void) { XXH3_state_t *const state = @@ -4613,6 +5263,7 @@ XXH_PUBLIC_API XXH3_state_t *XXH3_createState(void) { } +/*! @ingroup xxh3_family */ XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t *statePtr) { XXH_alignedFree(statePtr); @@ -4620,6 +5271,7 @@ XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t *statePtr) { } +/*! @ingroup xxh3_family */ XXH_PUBLIC_API void XXH3_copyState(XXH3_state_t * dst_state, const XXH3_state_t *src_state) { @@ -4627,9 +5279,8 @@ XXH_PUBLIC_API void XXH3_copyState(XXH3_state_t * dst_state, } -static void XXH3_64bits_reset_internal(XXH3_state_t *statePtr, - XXH64_hash_t seed, const void *secret, - size_t secretSize) { +static void XXH3_reset_internal(XXH3_state_t *statePtr, XXH64_hash_t seed, + const void *secret, size_t secretSize) { size_t const initStart = offsetof(XXH3_state_t, bufferedSize); size_t const initLength = @@ -4654,26 +5305,28 @@ static void XXH3_64bits_reset_internal(XXH3_state_t *statePtr, } +/*! @ingroup xxh3_family */ XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH3_state_t *statePtr) { if (statePtr == NULL) return XXH_ERROR; - XXH3_64bits_reset_internal(statePtr, 0, XXH3_kSecret, - XXH_SECRET_DEFAULT_SIZE); + XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE); return XXH_OK; } +/*! @ingroup xxh3_family */ XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret( XXH3_state_t *statePtr, const void *secret, size_t secretSize) { if (statePtr == NULL) return XXH_ERROR; - XXH3_64bits_reset_internal(statePtr, 0, secret, secretSize); + XXH3_reset_internal(statePtr, 0, secret, secretSize); if (secret == NULL) return XXH_ERROR; if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR; return XXH_OK; } +/*! @ingroup xxh3_family */ XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH3_state_t *statePtr, XXH64_hash_t seed) { @@ -4681,7 +5334,7 @@ XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH3_state_t *statePtr, if (seed == 0) return XXH3_64bits_reset(statePtr); if (seed != statePtr->seed) XXH3_initCustomSecret(statePtr->customSecret, seed); - XXH3_64bits_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE); + XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE); return XXH_OK; } @@ -4733,12 +5386,12 @@ XXH_FORCE_INLINE XXH_errorcode XXH3_update(XXH3_state_t *state, XXH3_f_scrambleAcc f_scramble) { if (input == NULL) - #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && \ - (XXH_ACCEPT_NULL_INPUT_POINTER >= 1) + #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && \ + (XXH_ACCEPT_NULL_INPUT_POINTER >= 1) return XXH_OK; - #else + #else return XXH_ERROR; - #endif + #endif { @@ -4747,6 +5400,7 @@ XXH_FORCE_INLINE XXH_errorcode XXH3_update(XXH3_state_t *state, (state->extSecret == NULL) ? state->customSecret : state->extSecret; state->totalLen += len; + XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE); if (state->bufferedSize + len <= XXH3_INTERNALBUFFER_SIZE) { /* fill in tmp buffer */ @@ -4756,10 +5410,10 @@ XXH_FORCE_INLINE XXH_errorcode XXH3_update(XXH3_state_t *state, } - /* total input is now > XXH3_INTERNALBUFFER_SIZE */ + /* total input is now > XXH3_INTERNALBUFFER_SIZE */ - #define XXH3_INTERNALBUFFER_STRIPES \ - (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN) + #define XXH3_INTERNALBUFFER_STRIPES \ + (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN) XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % XXH_STRIPE_LEN == 0); /* clean multiple */ @@ -4783,7 +5437,7 @@ XXH_FORCE_INLINE XXH_errorcode XXH3_update(XXH3_state_t *state, XXH_ASSERT(input < bEnd); /* Consume input by a multiple of internal buffer size */ - if (input + XXH3_INTERNALBUFFER_SIZE < bEnd) { + if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) { const xxh_u8 *const limit = bEnd - XXH3_INTERNALBUFFER_SIZE; do { @@ -4814,6 +5468,7 @@ XXH_FORCE_INLINE XXH_errorcode XXH3_update(XXH3_state_t *state, } +/*! @ingroup xxh3_family */ XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update(XXH3_state_t *state, const void *input, size_t len) { @@ -4859,6 +5514,7 @@ XXH_FORCE_INLINE void XXH3_digest_long(XXH64_hash_t * acc, } +/*! @ingroup xxh3_family */ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest(const XXH3_state_t *state) { const unsigned char *const secret = @@ -4881,8 +5537,9 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest(const XXH3_state_t *state) { } - #define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x)) + #define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x)) +/*! @ingroup xxh3_family */ XXH_PUBLIC_API void XXH3_generateSecret(void * secretBuffer, const void *customSeed, size_t customSeedSize) { @@ -5398,6 +6055,7 @@ XXH3_128bits_internal(const void *input, size_t len, XXH64_hash_t seed64, /* === Public XXH128 API === */ +/*! @ingroup xxh3_family */ XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void *input, size_t len) { return XXH3_128bits_internal(input, len, 0, XXH3_kSecret, @@ -5406,6 +6064,7 @@ XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void *input, size_t len) { } +/*! @ingroup xxh3_family */ XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSecret(const void *input, size_t len, const void *secret, @@ -5416,6 +6075,7 @@ XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSecret(const void *input, } +/*! @ingroup xxh3_family */ XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSeed(const void * input, size_t len, XXH64_hash_t seed) { @@ -5426,6 +6086,7 @@ XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSeed(const void * input, } +/*! @ingroup xxh3_family */ XXH_PUBLIC_API XXH128_hash_t XXH128(const void *input, size_t len, XXH64_hash_t seed) { @@ -5437,37 +6098,31 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void *input, size_t len, /* * All the functions are actually the same as for 64-bit streaming variant. - * The only difference is the finalizatiom routine. + * The only difference is the finalization routine. */ -static void XXH3_128bits_reset_internal(XXH3_state_t *statePtr, - XXH64_hash_t seed, const void *secret, - size_t secretSize) { - - XXH3_64bits_reset_internal(statePtr, seed, secret, secretSize); - -} - +/*! @ingroup xxh3_family */ XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH3_state_t *statePtr) { if (statePtr == NULL) return XXH_ERROR; - XXH3_128bits_reset_internal(statePtr, 0, XXH3_kSecret, - XXH_SECRET_DEFAULT_SIZE); + XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE); return XXH_OK; } +/*! @ingroup xxh3_family */ XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret( XXH3_state_t *statePtr, const void *secret, size_t secretSize) { if (statePtr == NULL) return XXH_ERROR; - XXH3_128bits_reset_internal(statePtr, 0, secret, secretSize); + XXH3_reset_internal(statePtr, 0, secret, secretSize); if (secret == NULL) return XXH_ERROR; if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR; return XXH_OK; } +/*! @ingroup xxh3_family */ XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH3_state_t *statePtr, XXH64_hash_t seed) { @@ -5475,11 +6130,12 @@ XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH3_state_t *statePtr, if (seed == 0) return XXH3_128bits_reset(statePtr); if (seed != statePtr->seed) XXH3_initCustomSecret(statePtr->customSecret, seed); - XXH3_128bits_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE); + XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE); return XXH_OK; } +/*! @ingroup xxh3_family */ XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update(XXH3_state_t *state, const void * input, size_t len) { @@ -5489,6 +6145,7 @@ XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update(XXH3_state_t *state, } +/*! @ingroup xxh3_family */ XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest(const XXH3_state_t *state) { const unsigned char *const secret = @@ -5524,11 +6181,12 @@ XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest(const XXH3_state_t *state) { } - /* 128-bit utility functions */ + /* 128-bit utility functions */ - #include /* memcmp, memcpy */ + #include /* memcmp, memcpy */ /* return : 1 is equal, 0 if different */ +/*! @ingroup xxh3_family */ XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2) { /* note : XXH128_hash_t is compact, it has no padding byte */ @@ -5540,6 +6198,7 @@ XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2) { * return : >0 if *h128_1 > *h128_2 * <0 if *h128_1 < *h128_2 * =0 if *h128_1 == *h128_2 */ +/*! @ingroup xxh3_family */ XXH_PUBLIC_API int XXH128_cmp(const void *h128_1, const void *h128_2) { XXH128_hash_t const h1 = *(const XXH128_hash_t *)h128_1; @@ -5552,6 +6211,7 @@ XXH_PUBLIC_API int XXH128_cmp(const void *h128_1, const void *h128_2) { } /*====== Canonical representation ======*/ +/*! @ingroup xxh3_family */ XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH128_canonical_t *dst, XXH128_hash_t hash) { @@ -5568,6 +6228,7 @@ XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH128_canonical_t *dst, } +/*! @ingroup xxh3_family */ XXH_PUBLIC_API XXH128_hash_t XXH128_hashFromCanonical(const XXH128_canonical_t *src) { @@ -5578,16 +6239,21 @@ XXH128_hashFromCanonical(const XXH128_canonical_t *src) { } - /* Pop our optimization override from above */ - #if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \ - && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \ - && defined(__OPTIMIZE__) && \ - !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */ - #pragma GCC pop_options - #endif + /* Pop our optimization override from above */ + #if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \ + && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \ + && defined(__OPTIMIZE__) && \ + !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */ + #pragma GCC pop_options + #endif - #endif /* XXH_NO_LONG_LONG */ + #endif /* XXH_NO_LONG_LONG */ + + #endif /* XXH_NO_XXH3 */ +/*! + * @} + */ #endif /* XXH_IMPLEMENTATION */ #if defined(__cplusplus) diff --git a/instrumentation/afl-compiler-rt.o.c b/instrumentation/afl-compiler-rt.o.c index 65bc16bc..06268d18 100644 --- a/instrumentation/afl-compiler-rt.o.c +++ b/instrumentation/afl-compiler-rt.o.c @@ -158,7 +158,7 @@ static void at_exit(int signal) { } -#define default_hash(a, b) XXH64(a, b, HASH_CONST) +#define default_hash(a, b) XXH3_64bits(a, b) /* Uninspired gcc plugin instrumentation */ diff --git a/src/afl-performance.c b/src/afl-performance.c index 89b170eb..ff8dfeb6 100644 --- a/src/afl-performance.c +++ b/src/afl-performance.c @@ -90,7 +90,8 @@ inline u32 hash32(u8 *key, u32 len, u32 seed) { #endif - return (u32)XXH64(key, len, seed); + (void)seed; + return (u32)XXH3_64bits(key, len); } @@ -102,7 +103,8 @@ inline u64 hash64(u8 *key, u32 len, u64 seed) { #endif - return XXH64(key, len, seed); + (void)seed; + return XXH3_64bits(key, len); } -- cgit 1.4.1