aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorvan Hauser <vh@thc.org>2021-11-07 14:09:09 +0100
committerGitHub <noreply@github.com>2021-11-07 14:09:09 +0100
commitfb443eaf2372ccd1825699c978fd0d662155fb9e (patch)
treeff019fc0b0704c16d68655d0f3864ec4cda49d30
parent5b06413a5f109f310a62e36111a18d7325b246c3 (diff)
parent2ddbaa439ca78b0ae8cc6691d9657f5783b2d5e8 (diff)
downloadafl++-fb443eaf2372ccd1825699c978fd0d662155fb9e.tar.gz
Merge pull request #1141 from AFLplusplus/afl4
cmplog enhancement variant
-rw-r--r--GNUmakefile2
-rw-r--r--docs/Changelog.md2
-rw-r--r--frida_mode/src/cmplog/cmplog_arm64.c51
-rw-r--r--frida_mode/src/cmplog/cmplog_x64.c50
-rw-r--r--frida_mode/src/cmplog/cmplog_x86.c51
-rw-r--r--include/afl-fuzz.h1
-rw-r--r--include/cmplog.h13
-rw-r--r--include/config.h4
-rw-r--r--include/types.h3
-rw-r--r--include/xxhash.h3084
-rw-r--r--instrumentation/afl-compiler-rt.o.c205
-rw-r--r--instrumentation/cmplog-routines-pass.cc244
-rw-r--r--qemu_mode/QEMUAFL_VERSION2
m---------qemu_mode/qemuafl0
-rw-r--r--src/afl-forkserver.c24
-rw-r--r--src/afl-fuzz-one.c25
-rw-r--r--src/afl-fuzz-queue.c91
-rw-r--r--src/afl-fuzz-redqueen.c363
-rw-r--r--src/afl-fuzz-stats.c3
-rw-r--r--src/afl-fuzz.c19
-rw-r--r--src/afl-performance.c6
-rw-r--r--test/test-cmplog.c21
22 files changed, 2844 insertions, 1420 deletions
diff --git a/GNUmakefile b/GNUmakefile
index ad2642f3..06840786 100644
--- a/GNUmakefile
+++ b/GNUmakefile
@@ -32,7 +32,7 @@ VERSION = $(shell grep '^$(HASH)define VERSION ' ../config.h | cut -d '"' -f
# PROGS intentionally omit afl-as, which gets installed elsewhere.
PROGS = afl-fuzz afl-showmap afl-tmin afl-gotcpu afl-analyze
-SH_PROGS = afl-plot afl-cmin afl-cmin.bash afl-whatsup afl-system-config afl-persistent-config
+SH_PROGS = afl-plot afl-cmin afl-cmin.bash afl-whatsup afl-system-config afl-persistent-config afl-cc
MANPAGES=$(foreach p, $(PROGS) $(SH_PROGS), $(p).8) afl-as.8
ASAN_OPTIONS=detect_leaks=0
diff --git a/docs/Changelog.md b/docs/Changelog.md
index 7c77a6bf..2c72b5f2 100644
--- a/docs/Changelog.md
+++ b/docs/Changelog.md
@@ -18,6 +18,8 @@ sending a mail to <afl-users+subscribe@googlegroups.com>.
- fix -n dumb mode (nobody should use this)
- fix stability issue with LTO and cmplog
- better banner
+ - more effective cmplog mode
+ - more often update the UI when in input2stage mode
- frida_mode: David Carlier added Android support :)
- afl-showmap, afl-tmin and afl-analyze:
- honor persistent mode for more speed. thanks to dloffre-snl for
diff --git a/frida_mode/src/cmplog/cmplog_arm64.c b/frida_mode/src/cmplog/cmplog_arm64.c
index dd97f38d..ccc8e89e 100644
--- a/frida_mode/src/cmplog/cmplog_arm64.c
+++ b/frida_mode/src/cmplog/cmplog_arm64.c
@@ -104,9 +104,9 @@ static void cmplog_call_callout(GumCpuContext *context, gpointer user_data) {
gsize x0 = ctx_read_reg(context, ARM64_REG_X0);
gsize x1 = ctx_read_reg(context, ARM64_REG_X1);
- if (((G_MAXULONG - x0) < 32) || ((G_MAXULONG - x1) < 32)) return;
+ if (((G_MAXULONG - x0) < 31) || ((G_MAXULONG - x1) < 31)) return;
- if (!cmplog_is_readable(x0, 32) || !cmplog_is_readable(x1, 32)) return;
+ if (!cmplog_is_readable(x0, 31) || !cmplog_is_readable(x1, 31)) return;
void *ptr1 = GSIZE_TO_POINTER(x0);
void *ptr2 = GSIZE_TO_POINTER(x1);
@@ -116,18 +116,36 @@ static void cmplog_call_callout(GumCpuContext *context, gpointer user_data) {
k = (k >> 4) ^ (k << 8);
k &= CMP_MAP_W - 1;
- __afl_cmp_map->headers[k].type = CMP_TYPE_RTN;
+ if (__afl_cmp_map->headers[k].type != CMP_TYPE_RTN) {
+
+ __afl_cmp_map->headers[k].type = CMP_TYPE_RTN;
+ __afl_cmp_map->headers[k].hits = 0;
+
+ }
+
+ u32 hits = 0;
+
+ if (__afl_cmp_map->headers[k].hits == 0) {
+
+ __afl_cmp_map->headers[k].shape = 30;
+
+ } else {
+
+ hits = __afl_cmp_map->headers[k].hits;
+
+ }
- u32 hits = __afl_cmp_map->headers[k].hits;
__afl_cmp_map->headers[k].hits = hits + 1;
- __afl_cmp_map->headers[k].shape = 31;
+ __afl_cmp_map->headers[k].shape = 30;
hits &= CMP_MAP_RTN_H - 1;
+ ((struct cmpfn_operands *)__afl_cmp_map->log[k])[hits].v0_len = 31;
+ ((struct cmpfn_operands *)__afl_cmp_map->log[k])[hits].v1_len = 31;
gum_memcpy(((struct cmpfn_operands *)__afl_cmp_map->log[k])[hits].v0, ptr1,
- 32);
+ 31);
gum_memcpy(((struct cmpfn_operands *)__afl_cmp_map->log[k])[hits].v1, ptr2,
- 32);
+ 31);
}
@@ -193,12 +211,23 @@ static void cmplog_handle_cmp_sub(GumCpuContext *context, gsize operand1,
k = (k >> 4) ^ (k << 8);
k &= CMP_MAP_W - 1;
- __afl_cmp_map->headers[k].type = CMP_TYPE_INS;
+ if (__afl_cmp_map->headers[k].type != CMP_TYPE_INS)
+ __afl_cmp_map->headers[k].hits = 0;
- u32 hits = __afl_cmp_map->headers[k].hits;
- __afl_cmp_map->headers[k].hits = hits + 1;
+ u32 hits = 0;
+
+ if (__afl_cmp_map->headers[k].hits == 0) {
+
+ __afl_cmp_map->headers[k].type = CMP_TYPE_INS;
+ __afl_cmp_map->headers[k].shape = (size - 1);
+
+ } else {
- __afl_cmp_map->headers[k].shape = (size - 1);
+ hits = __afl_cmp_map->headers[k].hits;
+
+ }
+
+ __afl_cmp_map->headers[k].hits = hits + 1;
hits &= CMP_MAP_H - 1;
__afl_cmp_map->log[k][hits].v0 = operand1;
diff --git a/frida_mode/src/cmplog/cmplog_x64.c b/frida_mode/src/cmplog/cmplog_x64.c
index 0d18767a..5319f727 100644
--- a/frida_mode/src/cmplog/cmplog_x64.c
+++ b/frida_mode/src/cmplog/cmplog_x64.c
@@ -99,9 +99,9 @@ static void cmplog_call_callout(GumCpuContext *context, gpointer user_data) {
gsize rdi = ctx_read_reg(context, X86_REG_RDI);
gsize rsi = ctx_read_reg(context, X86_REG_RSI);
- if (((G_MAXULONG - rdi) < 32) || ((G_MAXULONG - rsi) < 32)) return;
+ if (((G_MAXULONG - rdi) < 31) || ((G_MAXULONG - rsi) < 31)) return;
- if (!cmplog_is_readable(rdi, 32) || !cmplog_is_readable(rsi, 32)) return;
+ if (!cmplog_is_readable(rdi, 31) || !cmplog_is_readable(rsi, 31)) return;
void *ptr1 = GSIZE_TO_POINTER(rdi);
void *ptr2 = GSIZE_TO_POINTER(rsi);
@@ -111,18 +111,34 @@ static void cmplog_call_callout(GumCpuContext *context, gpointer user_data) {
k = (k >> 4) ^ (k << 8);
k &= CMP_MAP_W - 1;
- __afl_cmp_map->headers[k].type = CMP_TYPE_RTN;
+ if (__afl_cmp_map->headers[k].type != CMP_TYPE_RTN) {
- u32 hits = __afl_cmp_map->headers[k].hits;
- __afl_cmp_map->headers[k].hits = hits + 1;
+ __afl_cmp_map->headers[k].type = CMP_TYPE_RTN;
+ __afl_cmp_map->headers[k].hits = 0;
+
+ }
+
+ u32 hits = 0;
+
+ if (__afl_cmp_map->headers[k].hits == 0) {
+
+ __afl_cmp_map->headers[k].shape = 30;
+
+ } else {
+
+ hits = __afl_cmp_map->headers[k].hits;
+
+ }
- __afl_cmp_map->headers[k].shape = 31;
+ __afl_cmp_map->headers[k].hits = hits + 1;
hits &= CMP_MAP_RTN_H - 1;
+ ((struct cmpfn_operands *)__afl_cmp_map->log[k])[hits].v0_len = 31;
+ ((struct cmpfn_operands *)__afl_cmp_map->log[k])[hits].v1_len = 31;
gum_memcpy(((struct cmpfn_operands *)__afl_cmp_map->log[k])[hits].v0, ptr1,
- 32);
+ 31);
gum_memcpy(((struct cmpfn_operands *)__afl_cmp_map->log[k])[hits].v1, ptr2,
- 32);
+ 31);
}
@@ -179,13 +195,23 @@ static void cmplog_handle_cmp_sub(GumCpuContext *context, gsize operand1,
k = (k >> 4) ^ (k << 8);
k &= CMP_MAP_W - 7;
- __afl_cmp_map->headers[k].type = CMP_TYPE_INS;
+ if (__afl_cmp_map->headers[k].type != CMP_TYPE_INS)
+ __afl_cmp_map->headers[k].hits = 0;
- u32 hits = __afl_cmp_map->headers[k].hits;
- __afl_cmp_map->headers[k].hits = hits + 1;
+ u32 hits = 0;
+
+ if (__afl_cmp_map->headers[k].hits == 0) {
+
+ __afl_cmp_map->headers[k].type = CMP_TYPE_INS;
+ __afl_cmp_map->headers[k].shape = (size - 1);
- __afl_cmp_map->headers[k].shape = (size - 1);
+ } else {
+ hits = __afl_cmp_map->headers[k].hits;
+
+ }
+
+ __afl_cmp_map->headers[k].hits = hits + 1;
hits &= CMP_MAP_H - 1;
__afl_cmp_map->log[k][hits].v0 = operand1;
__afl_cmp_map->log[k][hits].v1 = operand2;
diff --git a/frida_mode/src/cmplog/cmplog_x86.c b/frida_mode/src/cmplog/cmplog_x86.c
index dd666c34..27d06720 100644
--- a/frida_mode/src/cmplog/cmplog_x86.c
+++ b/frida_mode/src/cmplog/cmplog_x86.c
@@ -104,9 +104,9 @@ static void cmplog_call_callout(GumCpuContext *context, gpointer user_data) {
gsize arg1 = esp[0];
gsize arg2 = esp[1];
- if (((G_MAXULONG - arg1) < 32) || ((G_MAXULONG - arg2) < 32)) return;
+ if (((G_MAXULONG - arg1) < 31) || ((G_MAXULONG - arg2) < 31)) return;
- if (!cmplog_is_readable(arg1, 32) || !cmplog_is_readable(arg2, 32)) return;
+ if (!cmplog_is_readable(arg1, 31) || !cmplog_is_readable(arg2, 31)) return;
void *ptr1 = GSIZE_TO_POINTER(arg1);
void *ptr2 = GSIZE_TO_POINTER(arg2);
@@ -116,18 +116,34 @@ static void cmplog_call_callout(GumCpuContext *context, gpointer user_data) {
k = (k >> 4) ^ (k << 8);
k &= CMP_MAP_W - 1;
- __afl_cmp_map->headers[k].type = CMP_TYPE_RTN;
+ if (__afl_cmp_map->headers[k].type != CMP_TYPE_RTN) {
- u32 hits = __afl_cmp_map->headers[k].hits;
- __afl_cmp_map->headers[k].hits = hits + 1;
+ __afl_cmp_map->headers[k].type = CMP_TYPE_RTN;
+ __afl_cmp_map->headers[k].hits = 0;
+
+ }
+
+ u32 hits = 0;
+
+ if (__afl_cmp_map->headers[k].hits == 0) {
- __afl_cmp_map->headers[k].shape = 31;
+ __afl_cmp_map->headers[k].shape = 30;
+
+ } else {
+
+ hits = __afl_cmp_map->headers[k].hits;
+
+ }
+
+ __afl_cmp_map->headers[k].hits = hits + 1;
hits &= CMP_MAP_RTN_H - 1;
+ ((struct cmpfn_operands *)__afl_cmp_map->log[k])[hits].v0_len = 31;
+ ((struct cmpfn_operands *)__afl_cmp_map->log[k])[hits].v1_len = 31;
gum_memcpy(((struct cmpfn_operands *)__afl_cmp_map->log[k])[hits].v0, ptr1,
- 32);
+ 31);
gum_memcpy(((struct cmpfn_operands *)__afl_cmp_map->log[k])[hits].v1, ptr2,
- 32);
+ 31);
}
@@ -184,12 +200,23 @@ static void cmplog_handle_cmp_sub(GumCpuContext *context, gsize operand1,
k = (k >> 4) ^ (k << 8);
k &= CMP_MAP_W - 1;
- __afl_cmp_map->headers[k].type = CMP_TYPE_INS;
+ if (__afl_cmp_map->headers[k].type != CMP_TYPE_INS)
+ __afl_cmp_map->headers[k].hits = 0;
- u32 hits = __afl_cmp_map->headers[k].hits;
- __afl_cmp_map->headers[k].hits = hits + 1;
+ u32 hits = 0;
+
+ if (__afl_cmp_map->headers[k].hits == 0) {
+
+ __afl_cmp_map->headers[k].type = CMP_TYPE_INS;
+ __afl_cmp_map->headers[k].shape = (size - 1);
+
+ } else {
- __afl_cmp_map->headers[k].shape = (size - 1);
+ hits = __afl_cmp_map->headers[k].hits;
+
+ }
+
+ __afl_cmp_map->headers[k].hits = hits + 1;
hits &= CMP_MAP_H - 1;
__afl_cmp_map->log[k][hits].v0 = operand1;
diff --git a/include/afl-fuzz.h b/include/afl-fuzz.h
index e73ea1a4..f3d6d99d 100644
--- a/include/afl-fuzz.h
+++ b/include/afl-fuzz.h
@@ -1135,6 +1135,7 @@ void setup_signal_handlers(void);
void save_cmdline(afl_state_t *, u32, char **);
void read_foreign_testcases(afl_state_t *, int);
void write_crash_readme(afl_state_t *afl);
+u8 check_if_text_buf(u8 *buf, u32 len);
/* CmpLog */
diff --git a/include/cmplog.h b/include/cmplog.h
index 1c15d2b8..8778a4b6 100644
--- a/include/cmplog.h
+++ b/include/cmplog.h
@@ -48,7 +48,8 @@ struct cmp_header {
unsigned shape : 5;
unsigned type : 2;
unsigned attribute : 4;
- unsigned reserved : 5;
+ unsigned overflow : 1;
+ unsigned reserved : 4;
} __attribute__((packed));
@@ -59,14 +60,16 @@ struct cmp_operands {
u64 v0_128;
u64 v1_128;
-};
+} __attribute__((packed));
struct cmpfn_operands {
- u8 v0[32];
- u8 v1[32];
+ u8 v0[31];
+ u8 v0_len;
+ u8 v1[31];
+ u8 v1_len;
-};
+} __attribute__((packed));
typedef struct cmp_operands cmp_map_list[CMP_MAP_H];
diff --git a/include/config.h b/include/config.h
index 3aee9b00..b787152f 100644
--- a/include/config.h
+++ b/include/config.h
@@ -267,8 +267,8 @@
(first value), and to keep in memory as candidates. The latter should be much
higher than the former. */
-#define USE_AUTO_EXTRAS 128
-#define MAX_AUTO_EXTRAS (USE_AUTO_EXTRAS * 64)
+#define USE_AUTO_EXTRAS 4096
+#define MAX_AUTO_EXTRAS (USE_AUTO_EXTRAS * 8)
/* Scaling factor for the effector map used to skip some of the more
expensive deterministic steps. The actual divisor is set to
diff --git a/include/types.h b/include/types.h
index e945f0f5..bbcc2f81 100644
--- a/include/types.h
+++ b/include/types.h
@@ -46,6 +46,8 @@ typedef uint128_t u128;
#define FS_ERROR_SHM_OPEN 4
#define FS_ERROR_SHMAT 8
#define FS_ERROR_MMAP 16
+#define FS_ERROR_OLD_CMPLOG 32
+#define FS_ERROR_OLD_CMPLOG_QEMU 64
/* Reporting options */
#define FS_OPT_ENABLED 0x80000001
@@ -53,6 +55,7 @@ typedef uint128_t u128;
#define FS_OPT_SNAPSHOT 0x20000000
#define FS_OPT_AUTODICT 0x10000000
#define FS_OPT_SHDMEM_FUZZ 0x01000000
+#define FS_OPT_NEWCMPLOG 0x02000000
#define FS_OPT_OLD_AFLPP_WORKAROUND 0x0f000000
// FS_OPT_MAX_MAPSIZE is 8388608 = 0x800000 = 2^23 = 1 << 22
#define FS_OPT_MAX_MAPSIZE ((0x00fffffeU >> 1) + 1)
diff --git a/include/xxhash.h b/include/xxhash.h
index 006d3f3d..0ca2b852 100644
--- a/include/xxhash.h
+++ b/include/xxhash.h
@@ -32,7 +32,12 @@
* - xxHash homepage: https://www.xxhash.com
* - xxHash source repository: https://github.com/Cyan4973/xxHash
*/
-
+/*!
+ * @mainpage xxHash
+ *
+ * @file xxhash.h
+ * xxHash prototypes and implementation
+ */
/* TODO: update */
/* Notice extracted from xxHash homepage:
@@ -45,7 +50,7 @@ Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo
Name Speed Q.Score Author
xxHash 5.4 GB/s 10
CrapWow 3.2 GB/s 2 Andrew
-MumurHash 3a 2.7 GB/s 10 Austin Appleby
+MurmurHash 3a 2.7 GB/s 10 Austin Appleby
SpookyHash 2.0 GB/s 10 Bob Jenkins
SBox 1.4 GB/s 9 Bret Mulvey
Lookup3 1.2 GB/s 9 Bob Jenkins
@@ -119,29 +124,78 @@ extern "C" {
/*
* This part deals with the special case where a unit wants to inline xxHash,
- * but "xxhash.h" has previously been included without XXH_INLINE_ALL, such
- * as part of some previously included *.h header file.
+ * but "xxhash.h" has previously been included without XXH_INLINE_ALL,
+ * such as part of some previously included *.h header file.
* Without further action, the new include would just be ignored,
* and functions would effectively _not_ be inlined (silent failure).
* The following macros solve this situation by prefixing all inlined names,
* avoiding naming collision with previous inclusions.
*/
- #ifdef XXH_NAMESPACE
- #error "XXH_INLINE_ALL with XXH_NAMESPACE is not supported"
- /*
- * Note: Alternative: #undef all symbols (it's a pretty large list).
- * Without #error: it compiles, but functions are actually not inlined.
- */
- #endif
+/* Before that, we unconditionally #undef all symbols,
+ * in case they were already defined with XXH_NAMESPACE.
+ * They will then be redefined for XXH_INLINE_ALL
+ */
+ #undef XXH_versionNumber
+/* XXH32 */
+ #undef XXH32
+ #undef XXH32_createState
+ #undef XXH32_freeState
+ #undef XXH32_reset
+ #undef XXH32_update
+ #undef XXH32_digest
+ #undef XXH32_copyState
+ #undef XXH32_canonicalFromHash
+ #undef XXH32_hashFromCanonical
+/* XXH64 */
+ #undef XXH64
+ #undef XXH64_createState
+ #undef XXH64_freeState
+ #undef XXH64_reset
+ #undef XXH64_update
+ #undef XXH64_digest
+ #undef XXH64_copyState
+ #undef XXH64_canonicalFromHash
+ #undef XXH64_hashFromCanonical
+/* XXH3_64bits */
+ #undef XXH3_64bits
+ #undef XXH3_64bits_withSecret
+ #undef XXH3_64bits_withSeed
+ #undef XXH3_createState
+ #undef XXH3_freeState
+ #undef XXH3_copyState
+ #undef XXH3_64bits_reset
+ #undef XXH3_64bits_reset_withSeed
+ #undef XXH3_64bits_reset_withSecret
+ #undef XXH3_64bits_update
+ #undef XXH3_64bits_digest
+ #undef XXH3_generateSecret
+/* XXH3_128bits */
+ #undef XXH128
+ #undef XXH3_128bits
+ #undef XXH3_128bits_withSeed
+ #undef XXH3_128bits_withSecret
+ #undef XXH3_128bits_reset
+ #undef XXH3_128bits_reset_withSeed
+ #undef XXH3_128bits_reset_withSecret
+ #undef XXH3_128bits_update
+ #undef XXH3_128bits_digest
+ #undef XXH128_isEqual
+ #undef XXH128_cmp
+ #undef XXH128_canonicalFromHash
+ #undef XXH128_hashFromCanonical
+/* Finally, free the namespace itself */
+ #undef XXH_NAMESPACE
+
+/* employ the namespace for XXH_INLINE_ALL */
#define XXH_NAMESPACE XXH_INLINE_
/*
- * Some identifiers (enums, type names) are not symbols, but they must
- * still be renamed to avoid redeclaration.
+ * Some identifiers (enums, type names) are not symbols,
+ * but they must nonetheless be renamed to avoid redeclaration.
* Alternative solution: do not redeclare them.
- * However, this requires some #ifdefs, and is a more dispersed action.
- * Meanwhile, renaming can be achieved in a single block
+ * However, this requires some #ifdefs, and has a more dispersed impact.
+ * Meanwhile, renaming can be achieved in a single place.
*/
- #define XXH_IPREF(Id) XXH_INLINE_##Id
+ #define XXH_IPREF(Id) XXH_NAMESPACE##Id
#define XXH_OK XXH_IPREF(XXH_OK)
#define XXH_ERROR XXH_IPREF(XXH_ERROR)
#define XXH_errorcode XXH_IPREF(XXH_errorcode)
@@ -166,6 +220,12 @@ extern "C" {
#ifndef XXHASH_H_5627135585666179
#define XXHASH_H_5627135585666179 1
+ /*!
+ * @defgroup public Public API
+ * Contains details on the public xxHash functions.
+ * @{
+
+ */
/* specific declaration modes for Windows */
#if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
#if defined(WIN32) && defined(_MSC_VER) && \
@@ -180,19 +240,24 @@ extern "C" {
#endif
#endif
- /*!
- * XXH_NAMESPACE, aka Namespace Emulation:
- *
- * If you want to include _and expose_ xxHash functions from within your own
- * library, but also want to avoid symbol collisions with other libraries
- * which may also include xxHash, you can use XXH_NAMESPACE to automatically
- * prefix any public symbol from xxhash library with the value of
- * XXH_NAMESPACE (therefore, avoid empty or numeric values).
- *
- * Note that no change is required within the calling program as long as it
- * includes `xxhash.h`: Regular symbol names will be automatically translated
- * by this header.
- */
+ #ifdef XXH_DOXYGEN
+ /*!
+ * @brief Emulate a namespace by transparently prefixing all symbols.
+ *
+ * If you want to include _and expose_ xxHash functions from within your own
+ * library, but also want to avoid symbol collisions with other libraries
+ * which may also include xxHash, you can use XXH_NAMESPACE to automatically
+ * prefix any public symbol from xxhash library with the value of
+ * XXH_NAMESPACE (therefore, avoid empty or numeric values).
+ *
+ * Note that no change is required within the calling program as long as it
+ * includes `xxhash.h`: Regular symbol names will be automatically
+ * translated by this header.
+ */
+ #define XXH_NAMESPACE /* YOUR NAME HERE */
+ #undef XXH_NAMESPACE
+ #endif
+
#ifdef XXH_NAMESPACE
#define XXH_CAT(A, B) A##B
#define XXH_NAME2(A, B) XXH_CAT(A, B)
@@ -264,10 +329,19 @@ extern "C" {
***************************************/
#define XXH_VERSION_MAJOR 0
#define XXH_VERSION_MINOR 8
- #define XXH_VERSION_RELEASE 0
+ #define XXH_VERSION_RELEASE 1
#define XXH_VERSION_NUMBER \
(XXH_VERSION_MAJOR * 100 * 100 + XXH_VERSION_MINOR * 100 + \
XXH_VERSION_RELEASE)
+
+/*!
+ * @brief Obtains the xxHash version.
+ *
+ * This is only useful when xxHash is compiled as a shared library, as it is
+ * independent of the version defined in the header.
+ *
+ * @return `XXH_VERSION_NUMBER` as of when the libray was compiled.
+ */
XXH_PUBLIC_API unsigned XXH_versionNumber(void);
/* ****************************
@@ -279,15 +353,24 @@ typedef enum { XXH_OK = 0, XXH_ERROR } XXH_errorcode;
/*-**********************************************************************
* 32-bit hash
************************************************************************/
- #if !defined(__VMS) && \
+ #if defined(XXH_DOXYGEN) /* Don't show <stdint.h> include */
+/*!
+ * @brief An unsigned 32-bit integer.
+ *
+ * Not necessarily defined to `uint32_t` but functionally equivalent.
+ */
+typedef uint32_t XXH32_hash_t;
+
+ #elif !defined(__VMS) && \
(defined(__cplusplus) || \
(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */))
#include <stdint.h>
-typedef uint32_t XXH32_hash_t;
+typedef uint32_t XXH32_hash_t;
+
#else
#include <limits.h>
#if UINT_MAX == 0xFFFFFFFFUL
-typedef unsigned int XXH32_hash_t;
+typedef unsigned int XXH32_hash_t;
#else
#if ULONG_MAX == 0xFFFFFFFFUL
typedef unsigned long XXH32_hash_t;
@@ -298,24 +381,52 @@ typedef unsigned long XXH32_hash_t;
#endif
/*!
- * XXH32():
- * Calculate the 32-bit hash of sequence "length" bytes stored at memory
- * address "input". The memory between input & input+length must be valid
- * (allocated and read-accessible). "seed" can be used to alter the result
- * predictably. Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher
- * benchmark): 5.4 GB/s
- *
- * Note: XXH3 provides competitive speed for both 32-bit and 64-bit systems,
- * and offers true 64/128 bit hash results. It provides a superior level of
- * dispersion, and greatly reduces the risks of collisions.
+ * @}
+ *
+ * @defgroup xxh32_family XXH32 family
+ * @ingroup public
+ * Contains functions used in the classic 32-bit xxHash algorithm.
+ *
+ * @note
+ * XXH32 is considered rather weak by today's standards.
+ * The @ref xxh3_family provides competitive speed for both 32-bit and 64-bit
+ * systems, and offers true 64/128 bit hash results. It provides a superior
+ * level of dispersion, and greatly reduces the risks of collisions.
+ *
+ * @see @ref xxh64_family, @ref xxh3_family : Other xxHash families
+ * @see @ref xxh32_impl for implementation details
+ * @{
+
+ */
+
+/*!
+ * @brief Calculates the 32-bit hash of @p input using xxHash32.
+ *
+ * Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark): 5.4 GB/s
+ *
+ * @param input The block of data to be hashed, at least @p length bytes in
+ * size.
+ * @param length The length of @p input, in bytes.
+ * @param seed The 32-bit seed to alter the hash's output predictably.
+ *
+ * @pre
+ * The memory between @p input and @p input + @p length must be valid,
+ * readable, contiguous memory. However, if @p length is `0`, @p input may be
+ * `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return The calculated 32-bit hash value.
+ *
+ * @see
+ * XXH64(), XXH3_64bits_withSeed(), XXH3_128bits_withSeed(), XXH128():
+ * Direct equivalents for the other variants of xxHash.
+ * @see
+ * XXH32_createState(), XXH32_update(), XXH32_digest(): Streaming version.
*/
XXH_PUBLIC_API XXH32_hash_t XXH32(const void *input, size_t length,
XXH32_hash_t seed);
-/******* Streaming *******/
-
-/*
- * Streaming functions generate the xxHash value from an incrememtal input.
+/*!
+ * Streaming functions generate the xxHash value from an incremental input.
* This method is slower than single-call functions, due to state management.
* For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
*
@@ -336,19 +447,125 @@ XXH_PUBLIC_API XXH32_hash_t XXH32(const void *input, size_t length,
* digest, and generate new hash values later on by invoking `XXH*_digest()`.
*
* When done, release the state using `XXH*_freeState()`.
+ *
+ * Example code for incrementally hashing a file:
+ * @code{.c}
+ * #include <stdio.h>
+ * #include <xxhash.h>
+ * #define BUFFER_SIZE 256
+ *
+ * // Note: XXH64 and XXH3 use the same interface.
+ * XXH32_hash_t
+ * hashFile(FILE* stream)
+ * {
+
+ * XXH32_state_t* state;
+ * unsigned char buf[BUFFER_SIZE];
+ * size_t amt;
+ * XXH32_hash_t hash;
+ *
+ * state = XXH32_createState(); // Create a state
+ * assert(state != NULL); // Error check here
+ * XXH32_reset(state, 0xbaad5eed); // Reset state with our seed
+ * while ((amt = fread(buf, 1, sizeof(buf), stream)) != 0) {
+
+ * XXH32_update(state, buf, amt); // Hash the file in chunks
+ * }
+ * hash = XXH32_digest(state); // Finalize the hash
+ * XXH32_freeState(state); // Clean up
+ * return hash;
+ * }
+ * @endcode
+ */
+
+/*!
+ * @typedef struct XXH32_state_s XXH32_state_t
+ * @brief The opaque state struct for the XXH32 streaming API.
+ *
+ * @see XXH32_state_s for details.
*/
+typedef struct XXH32_state_s XXH32_state_t;
-typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */
+/*!
+ * @brief Allocates an @ref XXH32_state_t.
+ *
+ * Must be freed with XXH32_freeState().
+ * @return An allocated XXH32_state_t on success, `NULL` on failure.
+ */
XXH_PUBLIC_API XXH32_state_t *XXH32_createState(void);
-XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t *statePtr);
-XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t * dst_state,
- const XXH32_state_t *src_state);
+/*!
+ * @brief Frees an @ref XXH32_state_t.
+ *
+ * Must be allocated with XXH32_createState().
+ * @param statePtr A pointer to an @ref XXH32_state_t allocated with @ref
+ * XXH32_createState().
+ * @return XXH_OK.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t *statePtr);
+/*!
+ * @brief Copies one @ref XXH32_state_t to another.
+ *
+ * @param dst_state The state to copy to.
+ * @param src_state The state to copy from.
+ * @pre
+ * @p dst_state and @p src_state must not be `NULL` and must not overlap.
+ */
+XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t * dst_state,
+ const XXH32_state_t *src_state);
+/*!
+ * @brief Resets an @ref XXH32_state_t to begin a new hash.
+ *
+ * This function resets and seeds a state. Call it before @ref XXH32_update().
+ *
+ * @param statePtr The state struct to reset.
+ * @param seed The 32-bit seed to alter the hash result predictably.
+ *
+ * @pre
+ * @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
+ */
XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t *statePtr,
XXH32_hash_t seed);
+
+/*!
+ * @brief Consumes a block of @p input to an @ref XXH32_state_t.
+ *
+ * Call this to incrementally consume blocks of data.
+ *
+ * @param statePtr The state struct to update.
+ * @param input The block of data to be hashed, at least @p length bytes in
+ * size.
+ * @param length The length of @p input, in bytes.
+ *
+ * @pre
+ * @p statePtr must not be `NULL`.
+ * @pre
+ * The memory between @p input and @p input + @p length must be valid,
+ * readable, contiguous memory. However, if @p length is `0`, @p input may be
+ * `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
+ */
XXH_PUBLIC_API XXH_errorcode XXH32_update(XXH32_state_t *statePtr,
const void *input, size_t length);
-XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t *statePtr);
+
+/*!
+ * @brief Returns the calculated hash value from an @ref XXH32_state_t.
+ *
+ * @note
+ * Calling XXH32_digest() will not affect @p statePtr, so you can update,
+ * digest, and update again.
+ *
+ * @param statePtr The state struct to calculate the hash from.
+ *
+ * @pre
+ * @p statePtr must not be `NULL`.
+ *
+ * @return The calculated xxHash32 value from that state.
+ */
+XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t *statePtr);
/******* Canonical representation *******/
@@ -373,48 +590,158 @@ XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t *statePtr);
* canonical format.
*/
+/*!
+ * @brief Canonical (big endian) representation of @ref XXH32_hash_t.
+ */
typedef struct {
- unsigned char digest[4];
+ unsigned char digest[4]; /*!< Hash bytes, big endian */
} XXH32_canonical_t;
+/*!
+ * @brief Converts an @ref XXH32_hash_t to a big endian @ref XXH32_canonical_t.
+ *
+ * @param dst The @ref XXH32_canonical_t pointer to be stored to.
+ * @param hash The @ref XXH32_hash_t to be converted.
+ *
+ * @pre
+ * @p dst must not be `NULL`.
+ */
XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t *dst,
XXH32_hash_t hash);
+
+/*!
+ * @brief Converts an @ref XXH32_canonical_t to a native @ref XXH32_hash_t.
+ *
+ * @param src The @ref XXH32_canonical_t to convert.
+ *
+ * @pre
+ * @p src must not be `NULL`.
+ *
+ * @return The converted hash.
+ */
XXH_PUBLIC_API XXH32_hash_t
XXH32_hashFromCanonical(const XXH32_canonical_t *src);
+ #ifdef __has_attribute
+ #define XXH_HAS_ATTRIBUTE(x) __has_attribute(x)
+ #else
+ #define XXH_HAS_ATTRIBUTE(x) 0
+ #endif
+
+ /* C-language Attributes are added in C23. */
+ #if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && \
+ defined(__has_c_attribute)
+ #define XXH_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
+ #else
+ #define XXH_HAS_C_ATTRIBUTE(x) 0
+ #endif
+
+ #if defined(__cplusplus) && defined(__has_cpp_attribute)
+ #define XXH_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
+ #else
+ #define XXH_HAS_CPP_ATTRIBUTE(x) 0
+ #endif
+
+ /*
+ Define XXH_FALLTHROUGH macro for annotating switch case with the 'fallthrough'
+ attribute introduced in CPP17 and C23. CPP17 :
+ https://en.cppreference.com/w/cpp/language/attributes/fallthrough C23 :
+ https://en.cppreference.com/w/c/language/attributes/fallthrough
+ */
+ #if XXH_HAS_C_ATTRIBUTE(x)
+ #define XXH_FALLTHROUGH [[fallthrough]]
+ #elif XXH_HAS_CPP_ATTRIBUTE(x)
+ #define XXH_FALLTHROUGH [[fallthrough]]
+ #elif XXH_HAS_ATTRIBUTE(__fallthrough__)
+ #define XXH_FALLTHROUGH __attribute__((fallthrough))
+ #else
+ #define XXH_FALLTHROUGH
+ #endif
+
+/*!
+ * @}
+ * @ingroup public
+ * @{
+
+ */
+
#ifndef XXH_NO_LONG_LONG
/*-**********************************************************************
* 64-bit hash
************************************************************************/
- #if !defined(__VMS) && \
+ #if defined(XXH_DOXYGEN) /* don't include <stdint.h> */
+/*!
+ * @brief An unsigned 64-bit integer.
+ *
+ * Not necessarily defined to `uint64_t` but functionally equivalent.
+ */
+typedef uint64_t XXH64_hash_t;
+ #elif !defined(__VMS) && \
(defined(__cplusplus) || (defined(__STDC_VERSION__) && \
(__STDC_VERSION__ >= 199901L) /* C99 */))
#include <stdint.h>
typedef uint64_t XXH64_hash_t;
#else
+ #include <limits.h>
+ #if defined(__LP64__) && ULONG_MAX == 0xFFFFFFFFFFFFFFFFULL
+/* LP64 ABI says uint64_t is unsigned long */
+typedef unsigned long XXH64_hash_t;
+ #else
/* the following type must have a width of 64-bit */
typedef unsigned long long XXH64_hash_t;
+ #endif
#endif
/*!
- * XXH64():
- * Returns the 64-bit hash of sequence of length @length stored at memory
- * address @input.
- * @seed can be used to alter the result predictably.
+ * @}
+ *
+ * @defgroup xxh64_family XXH64 family
+ * @ingroup public
+ * @{
+
+ * Contains functions used in the classic 64-bit xxHash algorithm.
+ *
+ * @note
+ * XXH3 provides competitive speed for both 32-bit and 64-bit systems,
+ * and offers true 64/128 bit hash results. It provides a superior level of
+ * dispersion, and greatly reduces the risks of collisions.
+ */
+
+/*!
+ * @brief Calculates the 64-bit hash of @p input using xxHash64.
*
* This function usually runs faster on 64-bit systems, but slower on 32-bit
* systems (see benchmark).
*
- * Note: XXH3 provides competitive speed for both 32-bit and 64-bit systems,
- * and offers true 64/128 bit hash results. It provides a superior level of
- * dispersion, and greatly reduces the risks of collisions.
+ * @param input The block of data to be hashed, at least @p length bytes in
+ * size.
+ * @param length The length of @p input, in bytes.
+ * @param seed The 64-bit seed to alter the hash's output predictably.
+ *
+ * @pre
+ * The memory between @p input and @p input + @p length must be valid,
+ * readable, contiguous memory. However, if @p length is `0`, @p input may be
+ * `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return The calculated 64-bit hash.
+ *
+ * @see
+ * XXH32(), XXH3_64bits_withSeed(), XXH3_128bits_withSeed(), XXH128():
+ * Direct equivalents for the other variants of xxHash.
+ * @see
+ * XXH64_createState(), XXH64_update(), XXH64_digest(): Streaming version.
*/
XXH_PUBLIC_API XXH64_hash_t XXH64(const void *input, size_t length,
XXH64_hash_t seed);
/******* Streaming *******/
+/*!
+ * @brief The opaque state struct for the XXH64 streaming API.
+ *
+ * @see XXH64_state_s for details.
+ */
typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */
XXH_PUBLIC_API XXH64_state_t *XXH64_createState(void);
XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t *statePtr);
@@ -439,12 +766,15 @@ XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t *dst,
XXH_PUBLIC_API XXH64_hash_t
XXH64_hashFromCanonical(const XXH64_canonical_t *src);
-/*-**********************************************************************
- * XXH3 64-bit variant
- ************************************************************************/
+/*!
+ * @}
+ * ************************************************************************
+ * @defgroup xxh3_family XXH3 family
+ * @ingroup public
+ * @{
-/* ************************************************************************
- * XXH3 is a new hash algorithm featuring:
+ *
+ * XXH3 is a more recent hash algorithm featuring:
* - Improved speed for both small and large inputs
* - True 64-bit and 128-bit outputs
* - SIMD acceleration
@@ -454,41 +784,38 @@ XXH64_hashFromCanonical(const XXH64_canonical_t *src);
*
* https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html
*
- * In general, expect XXH3 to run about ~2x faster on large inputs and >3x
- * faster on small ones compared to XXH64, though exact differences depend on
- * the platform.
+ * Compared to XXH64, expect XXH3 to run approximately
+ * ~2x faster on large inputs and >3x faster on small ones,
+ * exact differences vary depending on platform.
*
- * The algorithm is portable: Like XXH32 and XXH64, it generates the same hash
- * on all platforms.
- *
- * It benefits greatly from SIMD and 64-bit arithmetic, but does not require it.
- *
- * Almost all 32-bit and 64-bit targets that can run XXH32 smoothly can run
- * XXH3 at competitive speeds, even if XXH64 runs slowly. Further details are
- * explained in the implementation.
+ * XXH3's speed benefits greatly from SIMD and 64-bit arithmetic,
+ * but does not require it.
+ * Any 32-bit and 64-bit targets that can run XXH32 smoothly
+ * can run XXH3 at competitive speeds, even without vector support.
+ * Further details are explained in the implementation.
*
* Optimized implementations are provided for AVX512, AVX2, SSE2, NEON, POWER8,
- * ZVector and scalar targets. This can be controlled with the XXH_VECTOR macro.
+ * ZVector and scalar targets. This can be controlled via the XXH_VECTOR macro.
+ *
+ * XXH3 implementation is portable:
+ * it has a generic C90 formulation that can be compiled on any platform,
+ * all implementations generage exactly the same hash value on all platforms.
+ * Starting from v0.8.0, it's also labelled "stable", meaning that
+ * any future version will also generate the same hash value.
*
* XXH3 offers 2 variants, _64bits and _128bits.
- * When only 64 bits are needed, prefer calling the _64bits variant, as it
- * reduces the amount of mixing, resulting in faster speed on small inputs.
*
+ * When only 64 bits are needed, prefer invoking the _64bits variant, as it
+ * reduces the amount of mixing, resulting in faster speed on small inputs.
* It's also generally simpler to manipulate a scalar return type than a struct.
*
- * The 128-bit version adds additional strength, but it is slightly slower.
- *
- * Return values of XXH3 and XXH128 are officially finalized starting
- * with v0.8.0 and will no longer change in future versions.
- * Avoid storing values from before that release in long-term storage.
- *
- * Results produced by v0.7.x are not comparable with results from v0.7.y.
- * However, the API is completely stable, and it can safely be used for
- * ephemeral data (local sessions).
- *
* The API supports one-shot hashing, streaming mode, and custom secrets.
*/
+/*-**********************************************************************
+ * XXH3 64-bit variant
+ ************************************************************************/
+
/* XXH3_64bits():
* default 64-bit variant, using default secret and default seed of 0.
* It's the fastest variant. */
@@ -504,20 +831,28 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void *data, size_t len);
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed(const void *data, size_t len,
XXH64_hash_t seed);
- /*
- * XXH3_64bits_withSecret():
- * It's possible to provide any blob of bytes as a "secret" to generate the
- * hash. This makes it more difficult for an external actor to prepare an
- * intentional collision. The main condition is that secretSize *must* be
- * large enough (>= XXH3_SECRET_SIZE_MIN). However, the quality of produced
- * hash values depends on secret's entropy. Technically, the secret must
- * look like a bunch of random bytes. Avoid "trivial" or structured data
- * such as repeated sequences or a text document. Whenever unsure about the
- * "randomness" of the blob of bytes, consider relabelling it as a "custom
- * seed" instead, and employ "XXH3_generateSecret()" (see below) to generate
- * a high entropy secret derived from the custom seed.
+ /*!
+ * The bare minimum size for a custom secret.
+ *
+ * @see
+ * XXH3_64bits_withSecret(), XXH3_64bits_reset_withSecret(),
+ * XXH3_128bits_withSecret(), XXH3_128bits_reset_withSecret().
*/
#define XXH3_SECRET_SIZE_MIN 136
+
+/*
+ * XXH3_64bits_withSecret():
+ * It's possible to provide any blob of bytes as a "secret" to generate the
+ * hash. This makes it more difficult for an external actor to prepare an
+ * intentional collision. The main condition is that secretSize *must* be large
+ * enough (>= XXH3_SECRET_SIZE_MIN). However, the quality of produced hash
+ * values depends on secret's entropy. Technically, the secret must look like a
+ * bunch of random bytes. Avoid "trivial" or structured data such as repeated
+ * sequences or a text document. Whenever unsure about the "randomness" of the
+ * blob of bytes, consider relabelling it as a "custom seed" instead, and employ
+ * "XXH3_generateSecret()" (see below) to generate a high entropy secret derived
+ * from the custom seed.
+ */
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(const void *data, size_t len,
const void *secret,
size_t secretSize);
@@ -529,6 +864,12 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(const void *data, size_t len,
* As a consequence, streaming is slower than one-shot hashing.
* For better performance, prefer one-shot functions whenever applicable.
*/
+
+/*!
+ * @brief The state struct for the XXH3 streaming API.
+ *
+ * @see XXH3_state_s for details.
+ */
typedef struct XXH3_state_s XXH3_state_t;
XXH_PUBLIC_API XXH3_state_t *XXH3_createState(void);
XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t *statePtr);
@@ -572,10 +913,16 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest(const XXH3_state_t *statePtr);
* XXH3 128-bit variant
************************************************************************/
+/*!
+ * @brief The return value from 128-bit hashes.
+ *
+ * Stored in little endian order, although the fields themselves are in native
+ * endianness.
+ */
typedef struct {
- XXH64_hash_t low64;
- XXH64_hash_t high64;
+ XXH64_hash_t low64; /*!< `value & 0xFFFFFFFFFFFFFFFF` */
+ XXH64_hash_t high64; /*!< `value >> 64` */
} XXH128_hash_t;
@@ -649,6 +996,9 @@ XXH128_hashFromCanonical(const XXH128_canonical_t *src);
#endif /* XXH_NO_LONG_LONG */
+/*!
+ * @}
+ */
#endif /* XXHASH_H_5627135585666179 */
#if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742)
@@ -660,7 +1010,7 @@ XXH128_hashFromCanonical(const XXH128_canonical_t *src);
* These declarations should only be used with static linking.
* Never use them in association with dynamic linking!
*****************************************************************************
- */
+*/
/*
* These definitions are only present to allow static allocation
@@ -668,41 +1018,72 @@ XXH128_hashFromCanonical(const XXH128_canonical_t *src);
* Never **ever** access their members directly.
*/
+/*!
+ * @internal
+ * @brief Structure for XXH32 streaming API.
+ *
+ * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,
+ * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is
+ * an opaque type. This allows fields to safely be changed.
+ *
+ * Typedef'd to @ref XXH32_state_t.
+ * Do not access the members of this struct directly.
+ * @see XXH64_state_s, XXH3_state_s
+ */
struct XXH32_state_s {
- XXH32_hash_t total_len_32;
- XXH32_hash_t large_len;
- XXH32_hash_t v1;
- XXH32_hash_t v2;
- XXH32_hash_t v3;
- XXH32_hash_t v4;
- XXH32_hash_t mem32[4];
- XXH32_hash_t memsize;
- XXH32_hash_t
- reserved; /* never read nor write, might be removed in a future version */
+ XXH32_hash_t total_len_32; /*!< Total length hashed, modulo 2^32 */
+ XXH32_hash_t large_len; /*!< Whether the hash is >= 16 (handles @ref
+ total_len_32 overflow) */
+ XXH32_hash_t v1; /*!< First accumulator lane */
+ XXH32_hash_t v2; /*!< Second accumulator lane */
+ XXH32_hash_t v3; /*!< Third accumulator lane */
+ XXH32_hash_t v4; /*!< Fourth accumulator lane */
+ XXH32_hash_t mem32[4]; /*!< Internal buffer for partial reads. Treated as
+ unsigned char[16]. */
+ XXH32_hash_t memsize; /*!< Amount of data in @ref mem32 */
+ XXH32_hash_t reserved; /*!< Reserved field. Do not read or write to it, it may
+ be removed. */
}; /* typedef'd to XXH32_state_t */
#ifndef XXH_NO_LONG_LONG /* defined when there is no 64-bit support */
+/*!
+ * @internal
+ * @brief Structure for XXH64 streaming API.
+ *
+ * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,
+ * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is
+ * an opaque type. This allows fields to safely be changed.
+ *
+ * Typedef'd to @ref XXH64_state_t.
+ * Do not access the members of this struct directly.
+ * @see XXH32_state_s, XXH3_state_s
+ */
struct XXH64_state_s {
- XXH64_hash_t total_len;
- XXH64_hash_t v1;
- XXH64_hash_t v2;
- XXH64_hash_t v3;
- XXH64_hash_t v4;
- XXH64_hash_t mem64[4];
- XXH32_hash_t memsize;
- XXH32_hash_t reserved32; /* required for padding anyway */
- XXH64_hash_t reserved64; /* never read nor write, might be removed in a future
- version */
+ XXH64_hash_t total_len; /*!< Total length hashed. This is always 64-bit. */
+ XXH64_hash_t v1; /*!< First accumulator lane */
+ XXH64_hash_t v2; /*!< Second accumulator lane */
+ XXH64_hash_t v3; /*!< Third accumulator lane */
+ XXH64_hash_t v4; /*!< Fourth accumulator lane */
+ XXH64_hash_t mem64[4]; /*!< Internal buffer for partial reads. Treated as
+ unsigned char[32]. */
+ XXH32_hash_t memsize; /*!< Amount of data in @ref mem64 */
+ XXH32_hash_t reserved32; /*!< Reserved field, needed for padding anyways*/
+ XXH64_hash_t reserved64; /*!< Reserved field. Do not read or write to it, it
+ may be removed. */
}; /* typedef'd to XXH64_state_t */
- #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11+ */
+ #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* >= C11 \
+ */
#include <stdalign.h>
#define XXH_ALIGN(n) alignas(n)
+ #elif defined(__cplusplus) && (__cplusplus >= 201103L) /* >= C++11 */
+ /* In C++ alignas() is a keyword */
+ #define XXH_ALIGN(n) alignas(n)
#elif defined(__GNUC__)
#define XXH_ALIGN(n) __attribute__((aligned(n)))
#elif defined(_MSC_VER)
@@ -713,39 +1094,94 @@ struct XXH64_state_s {
/* Old GCC versions only accept the attribute after the type in structures.
*/
- #if !(defined(__STDC_VERSION__) && \
- (__STDC_VERSION__ >= 201112L)) /* C11+ */ \
+ #if !(defined(__STDC_VERSION__) && \
+ (__STDC_VERSION__ >= 201112L)) /* C11+ */ \
+ && !(defined(__cplusplus) && (__cplusplus >= 201103L)) /* >= C++11 */ \
&& defined(__GNUC__)
#define XXH_ALIGN_MEMBER(align, type) type XXH_ALIGN(align)
#else
#define XXH_ALIGN_MEMBER(align, type) XXH_ALIGN(align) type
#endif
+ /*!
+ * @brief The size of the internal XXH3 buffer.
+ *
+ * This is the optimal update size for incremental hashing.
+ *
+ * @see XXH3_64b_update(), XXH3_128b_update().
+ */
#define XXH3_INTERNALBUFFER_SIZE 256
+
+ /*!
+ * @brief Default size of the secret buffer (and @ref XXH3_kSecret).
+ *
+ * This is the size used in @ref XXH3_kSecret and the seeded functions.
+ *
+ * Not to be confused with @ref XXH3_SECRET_SIZE_MIN.
+ */
#define XXH3_SECRET_DEFAULT_SIZE 192
+
+/*!
+ * @internal
+ * @brief Structure for XXH3 streaming API.
+ *
+ * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,
+ * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined.
+ * Otherwise it is an opaque type.
+ * Never use this definition in combination with dynamic library.
+ * This allows fields to safely be changed in the future.
+ *
+ * @note ** This structure has a strict alignment requirement of 64 bytes!! **
+ * Do not allocate this with `malloc()` or `new`,
+ * it will not be sufficiently aligned.
+ * Use @ref XXH3_createState() and @ref XXH3_freeState(), or stack allocation.
+ *
+ * Typedef'd to @ref XXH3_state_t.
+ * Do never access the members of this struct directly.
+ *
+ * @see XXH3_INITSTATE() for stack initialization.
+ * @see XXH3_createState(), XXH3_freeState().
+ * @see XXH32_state_s, XXH64_state_s
+ */
struct XXH3_state_s {
XXH_ALIGN_MEMBER(64, XXH64_hash_t acc[8]);
- /* used to store a custom secret generated from a seed */
+ /*!< The 8 accumulators. Similar to `vN` in @ref XXH32_state_s::v1 and @ref
+ * XXH64_state_s */
XXH_ALIGN_MEMBER(64, unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]);
+ /*!< Used to store a custom secret generated from a seed. */
XXH_ALIGN_MEMBER(64, unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]);
- XXH32_hash_t bufferedSize;
- XXH32_hash_t reserved32;
- size_t nbStripesSoFar;
- XXH64_hash_t totalLen;
- size_t nbStripesPerBlock;
- size_t secretLimit;
- XXH64_hash_t seed;
- XXH64_hash_t reserved64;
- const unsigned char *extSecret; /* reference to external secret;
- * if == NULL, use .customSecret instead */
+ /*!< The internal buffer. @see XXH32_state_s::mem32 */
+ XXH32_hash_t bufferedSize;
+ /*!< The amount of memory in @ref buffer, @see XXH32_state_s::memsize */
+ XXH32_hash_t reserved32;
+ /*!< Reserved field. Needed for padding on 64-bit. */
+ size_t nbStripesSoFar;
+ /*!< Number or stripes processed. */
+ XXH64_hash_t totalLen;
+ /*!< Total length hashed. 64-bit even on 32-bit targets. */
+ size_t nbStripesPerBlock;
+ /*!< Number of stripes per block. */
+ size_t secretLimit;
+ /*!< Size of @ref customSecret or @ref extSecret */
+ XXH64_hash_t seed;
+ /*!< Seed for _withSeed variants. Must be zero otherwise, @see
+ * XXH3_INITSTATE() */
+ XXH64_hash_t reserved64;
+ /*!< Reserved field. */
+ const unsigned char *extSecret;
+ /*!< Reference to an external secret for the _withSecret variants, NULL
+ * for other variants. */
/* note: there may be some padding at the end due to alignment on 64 bytes */
}; /* typedef'd to XXH3_state_t */
#undef XXH_ALIGN_MEMBER
- /* When the XXH3_state_t structure is merely emplaced on stack,
+ /*!
+ * @brief Initializes a stack-allocated `XXH3_state_s`.
+ *
+ * When the @ref XXH3_state_t structure is merely emplaced on stack,
* it should be initialized with XXH3_INITSTATE() or a memset()
* in case its first reset uses XXH3_NNbits_reset_withSeed().
* This init can be omitted if the first reset uses default or _withSecret
@@ -802,7 +1238,6 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void *data, size_t len,
XXH64_hash_t seed);
#endif /* XXH_NO_LONG_LONG */
-
#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
#define XXH_IMPLEMENTATION
#endif
@@ -844,81 +1279,183 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void *data, size_t len,
/* *************************************
* Tuning parameters
***************************************/
+
/*!
- * XXH_FORCE_MEMORY_ACCESS:
- * By default, access to unaligned memory is controlled by `memcpy()`, which
- * is safe and portable.
- *
- * Unfortunately, on some target/compiler combinations, the generated assembly
- * is sub-optimal.
+ * @defgroup tuning Tuning parameters
+ * @{
+
*
- * The below switch allow selection of a different access method
- * in the search for improved performance.
- * Method 0 (default):
- * Use `memcpy()`. Safe and portable. Default.
- * Method 1:
- * `__attribute__((packed))` statement. It depends on compiler extensions
- * and is therefore not portable.
- * This method is safe if your compiler supports it, and *generally* as
- * fast or faster than `memcpy`.
- * Method 2:
- * Direct access via cast. This method doesn't depend on the compiler but
- * violates the C standard.
- * It can generate buggy code on targets which do not support unaligned
- * memory accesses.
- * But in some circumstances, it's the only known way to get the most
- * performance (example: GCC + ARMv6)
- * Method 3:
- * Byteshift. This can generate the best code on old compilers which don't
- * inline small `memcpy()` calls, and it might also be faster on
- * big-endian systems which lack a native byteswap instruction. See
- * https://stackoverflow.com/a/32095106/646947 for details. Prefer these
- * methods in priority order (0 > 1 > 2 > 3)
+ * Various macros to control xxHash's behavior.
*/
+ #ifdef XXH_DOXYGEN
+ /*!
+ * @brief Define this to disable 64-bit code.
+ *
+ * Useful if only using the @ref xxh32_family and you have a strict C90
+ * compiler.
+ */
+ #define XXH_NO_LONG_LONG
+ #undef XXH_NO_LONG_LONG /* don't actually */
+ /*!
+ * @brief Controls how unaligned memory is accessed.
+ *
+ * By default, access to unaligned memory is controlled by `memcpy()`, which
+ * is safe and portable.
+ *
+ * Unfortunately, on some target/compiler combinations, the generated
+ * assembly is sub-optimal.
+ *
+ * The below switch allow selection of a different access method
+ * in the search for improved performance.
+ *
+ * @par Possible options:
+ *
+ * - `XXH_FORCE_MEMORY_ACCESS=0` (default): `memcpy`
+ * @par
+ * Use `memcpy()`. Safe and portable. Note that most modern compilers
+ * will eliminate the function call and treat it as an unaligned access.
+ *
+ * - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((packed))`
+ * @par
+ * Depends on compiler extensions and is therefore not portable.
+ * This method is safe _if_ your compiler supports it,
+ * and *generally* as fast or faster than `memcpy`.
+ *
+ * - `XXH_FORCE_MEMORY_ACCESS=2`: Direct cast
+ * @par
+ * Casts directly and dereferences. This method doesn't depend on the
+ * compiler, but it violates the C standard as it directly dereferences
+ * an unaligned pointer. It can generate buggy code on targets which do not
+ * support unaligned memory accesses, but in some circumstances, it's
+ * the only known way to get the most performance.
+ *
+ * - `XXH_FORCE_MEMORY_ACCESS=3`: Byteshift
+ * @par
+ * Also portable. This can generate the best code on old compilers which
+ * don't inline small `memcpy()` calls, and it might also be faster on
+ * big-endian systems which lack a native byteswap instruction. However,
+ * some compilers will emit literal byteshifts even if the target supports
+ * unaligned access.
+ * .
+ *
+ * @warning
+ * Methods 1 and 2 rely on implementation-defined behavior. Use these with
+ * care, as what works on one compiler/platform/optimization level may
+ * cause another to read garbage data or even crash.
+ *
+ * See https://stackoverflow.com/a/32095106/646947 for details.
+ *
+ * Prefer these methods in priority order (0 > 3 > 1 > 2)
+ */
+ #define XXH_FORCE_MEMORY_ACCESS 0
+ /*!
+ * @def XXH_ACCEPT_NULL_INPUT_POINTER
+ * @brief Whether to add explicit `NULL` checks.
+ *
+ * If the input pointer is `NULL` and the length is non-zero, xxHash's
+ * default behavior is to dereference it, triggering a segfault.
+ *
+ * When this macro is enabled, xxHash actively checks the input for a null
+ * pointer. If it is, the result for null input pointers is the same as a
+ * zero-length input.
+ */
+ #define XXH_ACCEPT_NULL_INPUT_POINTER 0
+ /*!
+ * @def XXH_FORCE_ALIGN_CHECK
+ * @brief If defined to non-zero, adds a special path for aligned inputs
+ * (XXH32() and XXH64() only).
+ *
+ * This is an important performance trick for architectures without decent
+ * unaligned memory access performance.
+ *
+ * It checks for input alignment, and when conditions are met, uses a "fast
+ * path" employing direct 32-bit/64-bit reads, resulting in _dramatically
+ * faster_ read speed.
+ *
+ * The check costs one initial branch per hash, which is generally
+ * negligible, but not zero.
+ *
+ * Moreover, it's not useful to generate an additional code path if memory
+ * access uses the same instruction for both aligned and unaligned
+ * addresses (e.g. x86 and aarch64).
+ *
+ * In these cases, the alignment check can be removed by setting this macro
+ * to 0. Then the code will always use unaligned memory access. Align check
+ * is automatically disabled on x86, x64 & arm64, which are platforms known
+ * to offer good unaligned memory accesses performance.
+ *
+ * This option does not affect XXH3 (only XXH32 and XXH64).
+ */
+ #define XXH_FORCE_ALIGN_CHECK 0
+
+ /*!
+ * @def XXH_NO_INLINE_HINTS
+ * @brief When non-zero, sets all functions to `static`.
+ *
+ * By default, xxHash tries to force the compiler to inline almost all
+ * internal functions.
+ *
+ * This can usually improve performance due to reduced jumping and improved
+ * constant folding, but significantly increases the size of the binary
+ * which might not be favorable.
+ *
+ * Additionally, sometimes the forced inlining can be detrimental to
+ * performance, depending on the architecture.
+ *
+ * XXH_NO_INLINE_HINTS marks all internal functions as static, giving the
+ * compiler full control on whether to inline or not.
+ *
+ * When not optimizing (-O0), optimizing for size (-Os, -Oz), or using
+ * -fno-inline with GCC or Clang, this will automatically be defined.
+ */
+ #define XXH_NO_INLINE_HINTS 0
+
+ /*!
+ * @def XXH_REROLL
+ * @brief Whether to reroll `XXH32_finalize`.
+ *
+ * For performance, `XXH32_finalize` uses an unrolled loop
+ * in the form of a switch statement.
+ *
+ * This is not always desirable, as it generates larger code,
+ * and depending on the architecture, may even be slower
+ *
+ * This is automatically defined with `-Os`/`-Oz` on GCC and Clang.
+ */
+ #define XXH_REROLL 0
+
+ /*!
+ * @internal
+ * @brief Redefines old internal names.
+ *
+ * For compatibility with code that uses xxHash's internals before the names
+ * were changed to improve namespacing. There is no other reason to use
+ * this.
+ */
+ #define XXH_OLD_NAMES
+ #undef XXH_OLD_NAMES /* don't actually use, it is ugly. */
+ #endif /* XXH_DOXYGEN */
+/*!
+ * @}
+ */
+
#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command \
line for example */
- #if !defined(__clang__) && defined(__GNUC__) && \
- defined(__ARM_FEATURE_UNALIGNED) && defined(__ARM_ARCH) && \
- (__ARM_ARCH == 6)
- #define XXH_FORCE_MEMORY_ACCESS 2
- #elif !defined(__clang__) && \
- ((defined(__INTEL_COMPILER) && !defined(_WIN32)) || \
- (defined(__GNUC__) && (defined(__ARM_ARCH) && __ARM_ARCH >= 7)))
+ /* prefer __packed__ structures (method 1) for gcc on armv7+ and mips */
+ #if !defined(__clang__) && \
+ ((defined(__INTEL_COMPILER) && !defined(_WIN32)) || \
+ (defined(__GNUC__) && \
+ ((defined(__ARM_ARCH) && __ARM_ARCH >= 7) || \
+ (defined(__mips__) && (__mips <= 5 || __mips_isa_rev < 6) && \
+ (!defined(__mips16) || defined(__mips_mips16e2))))))
#define XXH_FORCE_MEMORY_ACCESS 1
#endif
#endif
- /*!
- * XXH_ACCEPT_NULL_INPUT_POINTER:
- * If the input pointer is NULL, xxHash's default behavior is to dereference
- * it, triggering a segfault. When this macro is enabled, xxHash actively
- * checks the input for a null pointer. If it is, the result for null input
- * pointers is the same as a zero-length input.
- */
#ifndef XXH_ACCEPT_NULL_INPUT_POINTER /* can be defined externally */
#define XXH_ACCEPT_NULL_INPUT_POINTER 0
#endif
- /*!
- * XXH_FORCE_ALIGN_CHECK:
- * This is an important performance trick
- * for architectures without decent unaligned memory access performance.
- * It checks for input alignment, and when conditions are met,
- * uses a "fast path" employing direct 32-bit/64-bit read,
- * resulting in _dramatically faster_ read speed.
- *
- * The check costs one initial branch per hash, which is generally negligible,
- * but not zero. Moreover, it's not useful to generate binary for an
- * additional code path if memory access uses same instruction for both
- * aligned and unaligned adresses.
- *
- * In these cases, the alignment check can be removed by setting this macro to
- * 0. Then the code will always use unaligned memory access. Align check is
- * automatically disabled on x86, x64 & arm64, which are platforms known to
- * offer good unaligned memory accesses performance.
- *
- * This option does not affect XXH3 (only XXH32 and XXH64).
- */
#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
#if defined(__i386) || defined(__x86_64__) || defined(__aarch64__) || \
defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64) /* visual */
@@ -928,25 +1465,6 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void *data, size_t len,
#endif
#endif
- /*!
- * XXH_NO_INLINE_HINTS:
- *
- * By default, xxHash tries to force the compiler to inline almost all
- * internal functions.
- *
- * This can usually improve performance due to reduced jumping and improved
- * constant folding, but significantly increases the size of the binary which
- * might not be favorable.
- *
- * Additionally, sometimes the forced inlining can be detrimental to
- * performance, depending on the architecture.
- *
- * XXH_NO_INLINE_HINTS marks all internal functions as static, giving the
- * compiler full control on whether to inline or not.
- *
- * When not optimizing (-O0), optimizing for size (-Os, -Oz), or using
- * -fno-inline with GCC or Clang, this will automatically be defined.
- */
#ifndef XXH_NO_INLINE_HINTS
#if defined(__OPTIMIZE_SIZE__) /* -Os, -Oz */ \
|| defined(__NO_INLINE__) /* -O0, -fno-inline */
@@ -956,44 +1474,57 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void *data, size_t len,
#endif
#endif
- /*!
- * XXH_REROLL:
- * Whether to reroll XXH32_finalize, and XXH64_finalize,
- * instead of using an unrolled jump table/if statement loop.
- *
- * This is automatically defined on -Os/-Oz on GCC and Clang.
- */
#ifndef XXH_REROLL
- #if defined(__OPTIMIZE_SIZE__)
+ #if defined(__OPTIMIZE_SIZE__) /* -Os, -Oz */ || \
+ (defined(__GNUC__) && !defined(__clang__))
+ /* The if/then loop is preferable to switch/case on gcc (on x64) */
#define XXH_REROLL 1
#else
#define XXH_REROLL 0
#endif
#endif
+ /*!
+ * @defgroup impl Implementation
+ * @{
+
+ */
+
/* *************************************
* Includes & Memory related functions
***************************************/
- /*!
+ /*
* Modify the local functions below should you wish to use
* different memory routines for malloc() and free()
*/
#include <stdlib.h>
+/*!
+ * @internal
+ * @brief Modify this function to use a different routine than malloc().
+ */
static void *XXH_malloc(size_t s) {
return malloc(s);
}
+/*!
+ * @internal
+ * @brief Modify this function to use a different routine than free().
+ */
static void XXH_free(void *p) {
free(p);
}
- /*! and for memcpy() */
#include <string.h>
+
+/*!
+ * @internal
+ * @brief Modify this function to use a different routine than memcpy().
+ */
static void *XXH_memcpy(void *dest, const void *src, size_t size) {
return memcpy(dest, src, size);
@@ -1037,7 +1568,11 @@ static void *XXH_memcpy(void *dest, const void *src, size_t size) {
/* *************************************
* Debug
***************************************/
- /*
+ /*!
+ * @ingroup tuning
+ * @def XXH_DEBUGLEVEL
+ * @brief Sets the debugging level.
+ *
* XXH_DEBUGLEVEL is expected to be defined externally, typically via the
* compiler's command line options. The value must be a number.
*/
@@ -1057,12 +1592,58 @@ static void *XXH_memcpy(void *dest, const void *src, size_t size) {
#endif
/* note: use after variable declarations */
- #define XXH_STATIC_ASSERT(c) \
- do { \
- \
- enum { XXH_sa = 1 / (int)(!!(c)) }; \
- \
- } while (0)
+ #ifndef XXH_STATIC_ASSERT
+ #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11 */
+ #include <assert.h>
+ #define XXH_STATIC_ASSERT_WITH_MESSAGE(c, m) \
+ do { \
+ \
+ static_assert((c), m); \
+ \
+ } while (0)
+ #elif defined(__cplusplus) && (__cplusplus >= 201103L) /* C++11 */
+ #define XXH_STATIC_ASSERT_WITH_MESSAGE(c, m) \
+ do { \
+ \
+ static_assert((c), m); \
+ \
+ } while (0)
+ #else
+ #define XXH_STATIC_ASSERT_WITH_MESSAGE(c, m) \
+ do { \
+ \
+ struct xxh_sa { \
+ \
+ char x[(c) ? 1 : -1]; \
+ \
+ }; \
+ \
+ } while (0)
+ #endif
+ #define XXH_STATIC_ASSERT(c) XXH_STATIC_ASSERT_WITH_MESSAGE((c), #c)
+ #endif
+
+ /*!
+ * @internal
+ * @def XXH_COMPILER_GUARD(var)
+ * @brief Used to prevent unwanted optimizations for @p var.
+ *
+ * It uses an empty GCC inline assembly statement with a register constraint
+ * which forces @p var into a general purpose register (eg eax, ebx, ecx
+ * on x86) and marks it as modified.
+ *
+ * This is used in a few places to avoid unwanted autovectorization (e.g.
+ * XXH32_round()). All vectorization we want is explicit via intrinsics,
+ * and _usually_ isn't wanted elsewhere.
+ *
+ * We also use it to prevent unwanted constant folding for AArch64 in
+ * XXH3_initCustomSecret_scalar().
+ */
+ #ifdef __GNUC__
+ #define XXH_COMPILER_GUARD(var) __asm__ __volatile__("" : "+r"(var))
+ #else
+ #define XXH_COMPILER_GUARD(var) ((void)0)
+ #endif
/* *************************************
* Basic Types
@@ -1085,6 +1666,56 @@ typedef XXH32_hash_t xxh_u32;
/* *** Memory access *** */
+/*!
+ * @internal
+ * @fn xxh_u32 XXH_read32(const void* ptr)
+ * @brief Reads an unaligned 32-bit integer from @p ptr in native endianness.
+ *
+ * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
+ *
+ * @param ptr The pointer to read from.
+ * @return The 32-bit native endian integer from the bytes at @p ptr.
+ */
+
+/*!
+ * @internal
+ * @fn xxh_u32 XXH_readLE32(const void* ptr)
+ * @brief Reads an unaligned 32-bit little endian integer from @p ptr.
+ *
+ * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
+ *
+ * @param ptr The pointer to read from.
+ * @return The 32-bit little endian integer from the bytes at @p ptr.
+ */
+
+/*!
+ * @internal
+ * @fn xxh_u32 XXH_readBE32(const void* ptr)
+ * @brief Reads an unaligned 32-bit big endian integer from @p ptr.
+ *
+ * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
+ *
+ * @param ptr The pointer to read from.
+ * @return The 32-bit big endian integer from the bytes at @p ptr.
+ */
+
+/*!
+ * @internal
+ * @fn xxh_u32 XXH_readLE32_align(const void* ptr, XXH_alignment align)
+ * @brief Like @ref XXH_readLE32(), but has an option for aligned reads.
+ *
+ * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
+ * Note that when @ref XXH_FORCE_ALIGN_CHECK == 0, the @p align parameter is
+ * always @ref XXH_alignment::XXH_unaligned.
+ *
+ * @param ptr The pointer to read from.
+ * @param align Whether @p ptr is aligned.
+ * @pre
+ * If @p align == @ref XXH_alignment::XXH_aligned, @p ptr must be 4 byte
+ * aligned.
+ * @return The 32-bit little endian integer from the bytes at @p ptr.
+ */
+
#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS == 3))
/*
* Manual byteshift. Best for old compilers which don't inline memcpy.
@@ -1146,16 +1777,23 @@ static xxh_u32 XXH_read32(const void *memPtr) {
#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
-/* *** Endianess *** */
-typedef enum { XXH_bigEndian = 0, XXH_littleEndian = 1 } XXH_endianess;
+ /* *** Endianness *** */
/*!
- * XXH_CPU_LITTLE_ENDIAN:
+ * @ingroup tuning
+ * @def XXH_CPU_LITTLE_ENDIAN
+ * @brief Whether the target is little endian.
+ *
* Defined to 1 if the target is little endian, or 0 if it is big endian.
* It can be defined externally, for example on the compiler command line.
*
- * If it is not defined, a runtime check (which is usually constant folded)
- * is used instead.
+ * If it is not defined,
+ * a runtime check (which is usually constant folded) is used instead.
+ *
+ * @note
+ * This is not necessarily defined to an integer constant.
+ *
+ * @see XXH_isLittleEndian() for the runtime check.
*/
#ifndef XXH_CPU_LITTLE_ENDIAN
/*
@@ -1170,8 +1808,11 @@ typedef enum { XXH_bigEndian = 0, XXH_littleEndian = 1 } XXH_endianess;
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
#define XXH_CPU_LITTLE_ENDIAN 0
#else
-/*
- * runtime test, presumed to simplify to a constant by compiler
+/*!
+ * @internal
+ * @brief Runtime check for @ref XXH_CPU_LITTLE_ENDIAN.
+ *
+ * Most compilers will constant fold this.
*/
static int XXH_isLittleEndian(void) {
@@ -1189,7 +1830,7 @@ static int XXH_isLittleEndian(void) {
return one.c[0];
}
-
+\
#define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian()
#endif
#endif
@@ -1205,6 +1846,19 @@ static int XXH_isLittleEndian(void) {
#define XXH_HAS_BUILTIN(x) 0
#endif
+ /*!
+ * @internal
+ * @def XXH_rotl32(x,r)
+ * @brief 32-bit rotate left.
+ *
+ * @param x The 32-bit integer to be rotated.
+ * @param r The number of bits to rotate.
+ * @pre
+ * @p r > 0 && @p r < 32
+ * @note
+ * @p x and @p r may be evaluated multiple times.
+ * @return The rotated result.
+ */
#if !defined(NO_CLANG_BUILTIN) && XXH_HAS_BUILTIN(__builtin_rotateleft32) && \
XXH_HAS_BUILTIN(__builtin_rotateleft64)
#define XXH_rotl32 __builtin_rotateleft32
@@ -1219,6 +1873,14 @@ static int XXH_isLittleEndian(void) {
#define XXH_rotl64(x, r) (((x) << (r)) | ((x) >> (64 - (r))))
#endif
+ /*!
+ * @internal
+ * @fn xxh_u32 XXH_swap32(xxh_u32 x)
+ * @brief A 32-bit byteswap.
+ *
+ * @param x The 32-bit integer to byteswap.
+ * @return @p x, byteswapped.
+ */
#if defined(_MSC_VER) /* Visual Studio */
#define XXH_swap32 _byteswap_ulong
#elif XXH_GCC_VERSION >= 403
@@ -1236,7 +1898,17 @@ static xxh_u32 XXH_swap32(xxh_u32 x) {
/* ***************************
* Memory reads
*****************************/
-typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
+
+/*!
+ * @internal
+ * @brief Enum to indicate whether a pointer is aligned.
+ */
+typedef enum {
+
+ XXH_aligned, /*!< Aligned */
+ XXH_unaligned /*!< Possibly unaligned */
+
+} XXH_alignment;
/*
* XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load.
@@ -1295,6 +1967,7 @@ XXH_FORCE_INLINE xxh_u32 XXH_readLE32_align(const void * ptr,
/* *************************************
* Misc
***************************************/
+/*! @ingroup public */
XXH_PUBLIC_API unsigned XXH_versionNumber(void) {
return XXH_VERSION_NUMBER;
@@ -1304,16 +1977,19 @@ XXH_PUBLIC_API unsigned XXH_versionNumber(void) {
/* *******************************************************************
* 32-bit hash functions
*********************************************************************/
-static const xxh_u32 XXH_PRIME32_1 =
- 0x9E3779B1U; /* 0b10011110001101110111100110110001 */
-static const xxh_u32 XXH_PRIME32_2 =
- 0x85EBCA77U; /* 0b10000101111010111100101001110111 */
-static const xxh_u32 XXH_PRIME32_3 =
- 0xC2B2AE3DU; /* 0b11000010101100101010111000111101 */
-static const xxh_u32 XXH_PRIME32_4 =
- 0x27D4EB2FU; /* 0b00100111110101001110101100101111 */
-static const xxh_u32 XXH_PRIME32_5 =
- 0x165667B1U; /* 0b00010110010101100110011110110001 */
+/*!
+ * @}
+ * @defgroup xxh32_impl XXH32 implementation
+ * @ingroup impl
+ * @{
+
+ */
+/* #define instead of static const, to be used as initializers */
+ #define XXH_PRIME32_1 0x9E3779B1U /*!< 0b10011110001101110111100110110001 */
+ #define XXH_PRIME32_2 0x85EBCA77U /*!< 0b10000101111010111100101001110111 */
+ #define XXH_PRIME32_3 0xC2B2AE3DU /*!< 0b11000010101100101010111000111101 */
+ #define XXH_PRIME32_4 0x27D4EB2FU /*!< 0b00100111110101001110101100101111 */
+ #define XXH_PRIME32_5 0x165667B1U /*!< 0b00010110010101100110011110110001 */
#ifdef XXH_OLD_NAMES
#define PRIME32_1 XXH_PRIME32_1
@@ -1323,19 +1999,29 @@ static const xxh_u32 XXH_PRIME32_5 =
#define PRIME32_5 XXH_PRIME32_5
#endif
+/*!
+ * @internal
+ * @brief Normal stripe processing routine.
+ *
+ * This shuffles the bits so that any bit from @p input impacts several bits in
+ * @p acc.
+ *
+ * @param acc The accumulator lane.
+ * @param input The stripe of input to mix.
+ * @return The mixed accumulator lane.
+ */
static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input) {
acc += input * XXH_PRIME32_2;
acc = XXH_rotl32(acc, 13);
acc *= XXH_PRIME32_1;
- #if defined(__GNUC__) && defined(__SSE4_1__) && \
+ #if (defined(__SSE4_1__) || defined(__aarch64__)) && \
!defined(XXH_ENABLE_AUTOVECTORIZE)
/*
* UGLY HACK:
- * This inline assembly hack forces acc into a normal register. This is the
- * only thing that prevents GCC and Clang from autovectorizing the XXH32
- * loop (pragmas and attributes don't work for some resason) without globally
- * disabling SSE4.1.
+ * A compiler fence is the only thing that prevents GCC and Clang from
+ * autovectorizing the XXH32 loop (pragmas and attributes don't work for some
+ * reason) without globally disabling SSE4.1.
*
* The reason we want to avoid vectorization is because despite working on
* 4 integers at a time, there are multiple factors slowing XXH32 down on
@@ -1360,28 +2046,26 @@ static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input) {
* can load data, while v3 can multiply. SSE forces them to operate
* together.
*
- * How this hack works:
- * __asm__("" // Declare an assembly block but don't declare any
- * instructions : // However, as an Input/Output Operand,
- * "+r" // constrain a read/write operand (+) as a general purpose
- * register (r). (acc) // and set acc as the operand
- * );
- *
- * Because of the 'r', the compiler has promised that seed will be in a
- * general purpose register and the '+' says that it will be 'read/write',
- * so it has to assume it has changed. It is like volatile without all the
- * loads and stores.
- *
- * Since the argument has to be in a normal register (not an SSE register),
- * each time XXH32_round is called, it is impossible to vectorize.
+ * This is also enabled on AArch64, as Clang autovectorizes it incorrectly
+ * and it is pointless writing a NEON implementation that is basically the
+ * same speed as scalar for XXH32.
*/
- __asm__("" : "+r"(acc));
+ XXH_COMPILER_GUARD(acc);
#endif
return acc;
}
-/* mix all bits */
+/*!
+ * @internal
+ * @brief Mixes all bits to finalize the hash.
+ *
+ * The final mix ensures that all input bits have a chance to impact any bit in
+ * the output digest, resulting in an unbiased distribution.
+ *
+ * @param h32 The hash to avalanche.
+ * @return The avalanched hash.
+ */
static xxh_u32 XXH32_avalanche(xxh_u32 h32) {
h32 ^= h32 >> 15;
@@ -1395,11 +2079,23 @@ static xxh_u32 XXH32_avalanche(xxh_u32 h32) {
#define XXH_get32bits(p) XXH_readLE32_align(p, align)
+/*!
+ * @internal
+ * @brief Processes the last 0-15 bytes of @p ptr.
+ *
+ * There may be up to 15 bytes remaining to consume from the input.
+ * This final stage will digest them to ensure that all input bytes are present
+ * in the final mix.
+ *
+ * @param h32 The hash to finalize.
+ * @param ptr The pointer to the remaining input.
+ * @param len The remaining length, modulo 16.
+ * @param align Whether @p ptr is aligned.
+ * @return The finalized hash.
+ */
static xxh_u32 XXH32_finalize(xxh_u32 h32, const xxh_u8 *ptr, size_t len,
XXH_alignment align) {
-
- /* dummy comment */
-
+\
#define XXH_PROCESS1 \
do { \
\
@@ -1443,20 +2139,20 @@ static xxh_u32 XXH32_finalize(xxh_u32 h32, const xxh_u8 *ptr, size_t len,
case 12:
XXH_PROCESS4;
- /* fallthrough */
+ XXH_FALLTHROUGH;
case 8:
XXH_PROCESS4;
- /* fallthrough */
+ XXH_FALLTHROUGH;
case 4:
XXH_PROCESS4;
return XXH32_avalanche(h32);
case 13:
XXH_PROCESS4;
- /* fallthrough */
+ XXH_FALLTHROUGH;
case 9:
XXH_PROCESS4;
- /* fallthrough */
+ XXH_FALLTHROUGH;
case 5:
XXH_PROCESS4;
XXH_PROCESS1;
@@ -1464,10 +2160,10 @@ static xxh_u32 XXH32_finalize(xxh_u32 h32, const xxh_u8 *ptr, size_t len,
case 14:
XXH_PROCESS4;
- /* fallthrough */
+ XXH_FALLTHROUGH;
case 10:
XXH_PROCESS4;
- /* fallthrough */
+ XXH_FALLTHROUGH;
case 6:
XXH_PROCESS4;
XXH_PROCESS1;
@@ -1476,22 +2172,22 @@ static xxh_u32 XXH32_finalize(xxh_u32 h32, const xxh_u8 *ptr, size_t len,
case 15:
XXH_PROCESS4;
- /* fallthrough */
+ XXH_FALLTHROUGH;
case 11:
XXH_PROCESS4;
- /* fallthrough */
+ XXH_FALLTHROUGH;
case 7:
XXH_PROCESS4;
- /* fallthrough */
+ XXH_FALLTHROUGH;
case 3:
XXH_PROCESS1;
- /* fallthrough */
+ XXH_FALLTHROUGH;
case 2:
XXH_PROCESS1;
- /* fallthrough */
+ XXH_FALLTHROUGH;
case 1:
XXH_PROCESS1;
- /* fallthrough */
+ XXH_FALLTHROUGH;
case 0:
return XXH32_avalanche(h32);
@@ -1512,10 +2208,18 @@ static xxh_u32 XXH32_finalize(xxh_u32 h32, const xxh_u8 *ptr, size_t len,
#undef XXH_PROCESS4
#endif
+/*!
+ * @internal
+ * @brief The implementation for @ref XXH32().
+ *
+ * @param input, len, seed Directly passed from @ref XXH32().
+ * @param align Whether @p input is aligned.
+ * @return The calculated hash.
+ */
XXH_FORCE_INLINE xxh_u32 XXH32_endian_align(const xxh_u8 *input, size_t len,
xxh_u32 seed, XXH_alignment align) {
- const xxh_u8 *bEnd = input + len;
+ const xxh_u8 *bEnd = input ? input + len : NULL;
xxh_u32 h32;
#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && \
@@ -1565,6 +2269,7 @@ XXH_FORCE_INLINE xxh_u32 XXH32_endian_align(const xxh_u8 *input, size_t len,
}
+/*! @ingroup xxh32_family */
XXH_PUBLIC_API XXH32_hash_t XXH32(const void *input, size_t len,
XXH32_hash_t seed) {
@@ -1574,9 +2279,7 @@ XXH_PUBLIC_API XXH32_hash_t XXH32(const void *input, size_t len,
XXH32_reset(&state, seed);
XXH32_update(&state, (const xxh_u8*)input, len);
return XXH32_digest(&state);
-
#else
-
if (XXH_FORCE_ALIGN_CHECK) {
if ((((size_t)input) & 3) ==
@@ -1593,13 +2296,16 @@ XXH_PUBLIC_API XXH32_hash_t XXH32(const void *input, size_t len,
}
/******* Hash streaming *******/
-
+/*!
+ * @ingroup xxh32_family
+ */
XXH_PUBLIC_API XXH32_state_t *XXH32_createState(void) {
return (XXH32_state_t *)XXH_malloc(sizeof(XXH32_state_t));
}
+/*! @ingroup xxh32_family */
XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t *statePtr) {
XXH_free(statePtr);
@@ -1607,6 +2313,7 @@ XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t *statePtr) {
}
+/*! @ingroup xxh32_family */
XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t * dstState,
const XXH32_state_t *srcState) {
@@ -1614,6 +2321,7 @@ XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t * dstState,
}
+/*! @ingroup xxh32_family */
XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t *statePtr,
XXH32_hash_t seed) {
@@ -1630,6 +2338,7 @@ XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t *statePtr,
}
+/*! @ingroup xxh32_family */
XXH_PUBLIC_API XXH_errorcode XXH32_update(XXH32_state_t *state,
const void *input, size_t len) {
@@ -1719,6 +2428,7 @@ XXH_PUBLIC_API XXH_errorcode XXH32_update(XXH32_state_t *state,
}
+/*! @ingroup xxh32_family */
XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t *state) {
xxh_u32 h32;
@@ -1743,7 +2453,8 @@ XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t *state) {
/******* Canonical representation *******/
-/*
+/*!
+ * @ingroup xxh32_family
* The default return values from XXH functions are unsigned 32 and 64 bit
* integers.
*
@@ -1765,6 +2476,7 @@ XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t *dst,
}
+/*! @ingroup xxh32_family */
XXH_PUBLIC_API XXH32_hash_t
XXH32_hashFromCanonical(const XXH32_canonical_t *src) {
@@ -1777,7 +2489,12 @@ XXH32_hashFromCanonical(const XXH32_canonical_t *src) {
/* *******************************************************************
* 64-bit hash functions
*********************************************************************/
+/*!
+ * @}
+ * @ingroup impl
+ * @{
+ */
/******* Memory access *******/
typedef XXH64_hash_t xxh_u64;
@@ -1786,40 +2503,6 @@ typedef XXH64_hash_t xxh_u64;
#define U64 xxh_u64
#endif
- /*!
- * XXH_REROLL_XXH64:
- * Whether to reroll the XXH64_finalize() loop.
- *
- * Just like XXH32, we can unroll the XXH64_finalize() loop. This can be a
- * performance gain on 64-bit hosts, as only one jump is required.
- *
- * However, on 32-bit hosts, because arithmetic needs to be done with two
- * 32-bit registers, and 64-bit arithmetic needs to be simulated, it isn't
- * beneficial to unroll. The code becomes ridiculously large (the largest
- * function in the binary on i386!), and rerolling it saves anywhere from
- * 3kB to 20kB. It is also slightly faster because it fits into cache better
- * and is more likely to be inlined by the compiler.
- *
- * If XXH_REROLL is defined, this is ignored and the loop is always
- * rerolled.
- */
- #ifndef XXH_REROLL_XXH64
- #if (defined(__ILP32__) || \
- defined(_ILP32)) /* ILP32 is often defined on 32-bit GCC family */ \
- || !(defined(__x86_64__) || defined(_M_X64) || \
- defined(_M_AMD64) /* x86-64 */ \
- || defined(_M_ARM64) || defined(__aarch64__) || \
- defined(__arm64__) /* aarch64 */ \
- || defined(__PPC64__) || defined(__PPC64LE__) || \
- defined(__ppc64__) || defined(__powerpc64__) /* ppc64 */ \
- || defined(__mips64__) || defined(__mips64)) /* mips64 */ \
- || (!defined(SIZE_MAX) || SIZE_MAX < ULLONG_MAX) /* check limits */
- #define XXH_REROLL_XXH64 1
- #else
- #define XXH_REROLL_XXH64 0
- #endif
- #endif /* !defined(XXH_REROLL_XXH64) */
-
#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS == 3))
/*
* Manual byteshift. Best for old compilers which don't inline memcpy.
@@ -1950,23 +2633,35 @@ XXH_FORCE_INLINE xxh_u64 XXH_readLE64_align(const void * ptr,
}
-/******* xxh64 *******/
+ /******* xxh64 *******/
+ /*!
+ * @}
+ * @defgroup xxh64_impl XXH64 implementation
+ * @ingroup impl
+ * @{
-static const xxh_u64 XXH_PRIME64_1 =
- 0x9E3779B185EBCA87ULL; /* 0b1001111000110111011110011011000110000101111010111100101010000111
- */
-static const xxh_u64 XXH_PRIME64_2 =
- 0xC2B2AE3D27D4EB4FULL; /* 0b1100001010110010101011100011110100100111110101001110101101001111
- */
-static const xxh_u64 XXH_PRIME64_3 =
- 0x165667B19E3779F9ULL; /* 0b0001011001010110011001111011000110011110001101110111100111111001
- */
-static const xxh_u64 XXH_PRIME64_4 =
- 0x85EBCA77C2B2AE63ULL; /* 0b1000010111101011110010100111011111000010101100101010111001100011
- */
-static const xxh_u64 XXH_PRIME64_5 =
- 0x27D4EB2F165667C5ULL; /* 0b0010011111010100111010110010111100010110010101100110011111000101
- */
+ */
+ /* #define rather that static const, to be used as initializers */
+ #define XXH_PRIME64_1 \
+ 0x9E3779B185EBCA87ULL /*!< \
+ 0b1001111000110111011110011011000110000101111010111100101010000111 \
+ */
+ #define XXH_PRIME64_2 \
+ 0xC2B2AE3D27D4EB4FULL /*!< \
+ 0b1100001010110010101011100011110100100111110101001110101101001111 \
+ */
+ #define XXH_PRIME64_3 \
+ 0x165667B19E3779F9ULL /*!< \
+ 0b0001011001010110011001111011000110011110001101110111100111111001 \
+ */
+ #define XXH_PRIME64_4 \
+ 0x85EBCA77C2B2AE63ULL /*!< \
+ 0b1000010111101011110010100111011111000010101100101010111001100011 \
+ */
+ #define XXH_PRIME64_5 \
+ 0x27D4EB2F165667C5ULL /*!< \
+ 0b0010011111010100111010110010111100010110010101100110011111000101 \
+ */
#ifdef XXH_OLD_NAMES
#define PRIME64_1 XXH_PRIME64_1
@@ -2010,185 +2705,35 @@ static xxh_u64 XXH64_avalanche(xxh_u64 h64) {
static xxh_u64 XXH64_finalize(xxh_u64 h64, const xxh_u8 *ptr, size_t len,
XXH_alignment align) {
- /* dummy comment */
-
- #define XXH_PROCESS1_64 \
- do { \
- \
- h64 ^= (*ptr++) * XXH_PRIME64_5; \
- h64 = XXH_rotl64(h64, 11) * XXH_PRIME64_1; \
- \
- } while (0)
-
- #define XXH_PROCESS4_64 \
- do { \
- \
- h64 ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1; \
- ptr += 4; \
- h64 = XXH_rotl64(h64, 23) * XXH_PRIME64_2 + XXH_PRIME64_3; \
- \
- } while (0)
-
- #define XXH_PROCESS8_64 \
- do { \
- \
- xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr)); \
- ptr += 8; \
- h64 ^= k1; \
- h64 = XXH_rotl64(h64, 27) * XXH_PRIME64_1 + XXH_PRIME64_4; \
- \
- } while (0)
-
- /* Rerolled version for 32-bit targets is faster and much smaller. */
- if (XXH_REROLL || XXH_REROLL_XXH64) {
-
- len &= 31;
- while (len >= 8) {
-
- XXH_PROCESS8_64;
- len -= 8;
-
- }
-
- if (len >= 4) {
-
- XXH_PROCESS4_64;
- len -= 4;
-
- }
-
- while (len > 0) {
+ len &= 31;
+ while (len >= 8) {
- XXH_PROCESS1_64;
- --len;
+ xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr));
+ ptr += 8;
+ h64 ^= k1;
+ h64 = XXH_rotl64(h64, 27) * XXH_PRIME64_1 + XXH_PRIME64_4;
+ len -= 8;
- }
+ }
- return XXH64_avalanche(h64);
+ if (len >= 4) {
- } else {
+ h64 ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1;
+ ptr += 4;
+ h64 = XXH_rotl64(h64, 23) * XXH_PRIME64_2 + XXH_PRIME64_3;
+ len -= 4;
- switch (len & 31) {
+ }
- case 24:
- XXH_PROCESS8_64;
- /* fallthrough */
- case 16:
- XXH_PROCESS8_64;
- /* fallthrough */
- case 8:
- XXH_PROCESS8_64;
- return XXH64_avalanche(h64);
-
- case 28:
- XXH_PROCESS8_64;
- /* fallthrough */
- case 20:
- XXH_PROCESS8_64;
- /* fallthrough */
- case 12:
- XXH_PROCESS8_64;
- /* fallthrough */
- case 4:
- XXH_PROCESS4_64;
- return XXH64_avalanche(h64);
-
- case 25:
- XXH_PROCESS8_64;
- /* fallthrough */
- case 17:
- XXH_PROCESS8_64;
- /* fallthrough */
- case 9:
- XXH_PROCESS8_64;
- XXH_PROCESS1_64;
- return XXH64_avalanche(h64);
-
- case 29:
- XXH_PROCESS8_64;
- /* fallthrough */
- case 21:
- XXH_PROCESS8_64;
- /* fallthrough */
- case 13:
- XXH_PROCESS8_64;
- /* fallthrough */
- case 5:
- XXH_PROCESS4_64;
- XXH_PROCESS1_64;
- return XXH64_avalanche(h64);
-
- case 26:
- XXH_PROCESS8_64;
- /* fallthrough */
- case 18:
- XXH_PROCESS8_64;
- /* fallthrough */
- case 10:
- XXH_PROCESS8_64;
- XXH_PROCESS1_64;
- XXH_PROCESS1_64;
- return XXH64_avalanche(h64);
-
- case 30:
- XXH_PROCESS8_64;
- /* fallthrough */
- case 22:
- XXH_PROCESS8_64;
- /* fallthrough */
- case 14:
- XXH_PROCESS8_64;
- /* fallthrough */
- case 6:
- XXH_PROCESS4_64;
- XXH_PROCESS1_64;
- XXH_PROCESS1_64;
- return XXH64_avalanche(h64);
-
- case 27:
- XXH_PROCESS8_64;
- /* fallthrough */
- case 19:
- XXH_PROCESS8_64;
- /* fallthrough */
- case 11:
- XXH_PROCESS8_64;
- XXH_PROCESS1_64;
- XXH_PROCESS1_64;
- XXH_PROCESS1_64;
- return XXH64_avalanche(h64);
-
- case 31:
- XXH_PROCESS8_64;
- /* fallthrough */
- case 23:
- XXH_PROCESS8_64;
- /* fallthrough */
- case 15:
- XXH_PROCESS8_64;
- /* fallthrough */
- case 7:
- XXH_PROCESS4_64;
- /* fallthrough */
- case 3:
- XXH_PROCESS1_64;
- /* fallthrough */
- case 2:
- XXH_PROCESS1_64;
- /* fallthrough */
- case 1:
- XXH_PROCESS1_64;
- /* fallthrough */
- case 0:
- return XXH64_avalanche(h64);
+ while (len > 0) {
- }
+ h64 ^= (*ptr++) * XXH_PRIME64_5;
+ h64 = XXH_rotl64(h64, 11) * XXH_PRIME64_1;
+ --len;
}
- /* impossible to reach */
- XXH_ASSERT(0);
- return 0; /* unreachable, but some compilers complain without it */
+ return XXH64_avalanche(h64);
}
@@ -2205,7 +2750,7 @@ static xxh_u64 XXH64_finalize(xxh_u64 h64, const xxh_u8 *ptr, size_t len,
XXH_FORCE_INLINE xxh_u64 XXH64_endian_align(const xxh_u8 *input, size_t len,
xxh_u64 seed, XXH_alignment align) {
- const xxh_u8 *bEnd = input + len;
+ const xxh_u8 *bEnd = input ? input + len : NULL;
xxh_u64 h64;
#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && \
@@ -2259,6 +2804,7 @@ XXH_FORCE_INLINE xxh_u64 XXH64_endian_align(const xxh_u8 *input, size_t len,
}
+/*! @ingroup xxh64_family */
XXH_PUBLIC_API XXH64_hash_t XXH64(const void *input, size_t len,
XXH64_hash_t seed) {
@@ -2268,9 +2814,7 @@ XXH_PUBLIC_API XXH64_hash_t XXH64(const void *input, size_t len,
XXH64_reset(&state, seed);
XXH64_update(&state, (const xxh_u8*)input, len);
return XXH64_digest(&state);
-
#else
-
if (XXH_FORCE_ALIGN_CHECK) {
if ((((size_t)input) & 7) ==
@@ -2289,12 +2833,14 @@ XXH_PUBLIC_API XXH64_hash_t XXH64(const void *input, size_t len,
/******* Hash Streaming *******/
+/*! @ingroup xxh64_family*/
XXH_PUBLIC_API XXH64_state_t *XXH64_createState(void) {
return (XXH64_state_t *)XXH_malloc(sizeof(XXH64_state_t));
}
+/*! @ingroup xxh64_family */
XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t *statePtr) {
XXH_free(statePtr);
@@ -2302,6 +2848,7 @@ XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t *statePtr) {
}
+/*! @ingroup xxh64_family */
XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t * dstState,
const XXH64_state_t *srcState) {
@@ -2309,6 +2856,7 @@ XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t * dstState,
}
+/*! @ingroup xxh64_family */
XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t *statePtr,
XXH64_hash_t seed) {
@@ -2325,6 +2873,7 @@ XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t *statePtr,
}
+/*! @ingroup xxh64_family */
XXH_PUBLIC_API XXH_errorcode XXH64_update(XXH64_state_t *state,
const void *input, size_t len) {
@@ -2403,6 +2952,7 @@ XXH_PUBLIC_API XXH_errorcode XXH64_update(XXH64_state_t *state,
}
+/*! @ingroup xxh64_family */
XXH_PUBLIC_API XXH64_hash_t XXH64_digest(const XXH64_state_t *state) {
xxh_u64 h64;
@@ -2436,6 +2986,7 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_digest(const XXH64_state_t *state) {
/******* Canonical representation *******/
+/*! @ingroup xxh64_family */
XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t *dst,
XXH64_hash_t hash) {
@@ -2445,6 +2996,7 @@ XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t *dst,
}
+/*! @ingroup xxh64_family */
XXH_PUBLIC_API XXH64_hash_t
XXH64_hashFromCanonical(const XXH64_canonical_t *src) {
@@ -2452,380 +3004,452 @@ XXH64_hashFromCanonical(const XXH64_canonical_t *src) {
}
- /* *********************************************************************
- * XXH3
- * New generation hash designed for speed on small keys and vectorization
- ************************************************************************ */
+ #ifndef XXH_NO_XXH3
- /* === Compiler specifics === */
+ /* *********************************************************************
+ * XXH3
+ * New generation hash designed for speed on small keys and vectorization
+ ************************************************************************ */
+ /*!
+ * @}
+ * @defgroup xxh3_impl XXH3 implementation
+ * @ingroup impl
+ * @{
- #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* >= C99 */
- #define XXH_RESTRICT restrict
- #else
- /* Note: it might be useful to define __restrict or __restrict__ for some
- * C++ compilers */
- #define XXH_RESTRICT /* disable */
- #endif
+ */
- #if (defined(__GNUC__) && (__GNUC__ >= 3)) || \
- (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || \
- defined(__clang__)
- #define XXH_likely(x) __builtin_expect(x, 1)
- #define XXH_unlikely(x) __builtin_expect(x, 0)
- #else
- #define XXH_likely(x) (x)
- #define XXH_unlikely(x) (x)
- #endif
+ /* === Compiler specifics === */
- #if defined(__GNUC__)
- #if defined(__AVX2__)
- #include <immintrin.h>
- #elif defined(__SSE2__)
- #include <emmintrin.h>
- #elif defined(__ARM_NEON__) || defined(__ARM_NEON)
- #define inline __inline__ /* circumvent a clang bug */
- #include <arm_neon.h>
- #undef inline
+ #if ((defined(sun) || defined(__sun)) && \
+ __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested \
+ with GCC 5.5 */
+ #define XXH_RESTRICT /* disable */
+ #elif defined(__STDC_VERSION__) && \
+ __STDC_VERSION__ >= 199901L /* >= C99 */
+ #define XXH_RESTRICT restrict
+ #else
+ /* Note: it might be useful to define __restrict or __restrict__ for
+ * some C++ compilers */
+ #define XXH_RESTRICT /* disable */
#endif
- #elif defined(_MSC_VER)
- #include <intrin.h>
- #endif
-
- /*
- * One goal of XXH3 is to make it fast on both 32-bit and 64-bit, while
- * remaining a true 64-bit/128-bit hash function.
- *
- * This is done by prioritizing a subset of 64-bit operations that can be
- * emulated without too many steps on the average 32-bit machine.
- *
- * For example, these two lines seem similar, and run equally fast on
- * 64-bit:
- *
- * xxh_u64 x;
- * x ^= (x >> 47); // good
- * x ^= (x >> 13); // bad
- *
- * However, to a 32-bit machine, there is a major difference.
- *
- * x ^= (x >> 47) looks like this:
- *
- * x.lo ^= (x.hi >> (47 - 32));
- *
- * while x ^= (x >> 13) looks like this:
- *
- * // note: funnel shifts are not usually cheap.
- * x.lo ^= (x.lo >> 13) | (x.hi << (32 - 13));
- * x.hi ^= (x.hi >> 13);
- *
- * The first one is significantly faster than the second, simply because the
- * shift is larger than 32. This means:
- * - All the bits we need are in the upper 32 bits, so we can ignore the
- * lower 32 bits in the shift.
- * - The shift result will always fit in the lower 32 bits, and therefore,
- * we can ignore the upper 32 bits in the xor.
- *
- * Thanks to this optimization, XXH3 only requires these features to be
- * efficient:
- *
- * - Usable unaligned access
- * - A 32-bit or 64-bit ALU
- * - If 32-bit, a decent ADC instruction
- * - A 32 or 64-bit multiply with a 64-bit result
- * - For the 128-bit variant, a decent byteswap helps short inputs.
- *
- * The first two are already required by XXH32, and almost all 32-bit and
- * 64-bit platforms which can run XXH32 can run XXH3 efficiently.
- *
- * Thumb-1, the classic 16-bit only subset of ARM's instruction set, is one
- * notable exception.
- *
- * First of all, Thumb-1 lacks support for the UMULL instruction which
- * performs the important long multiply. This means numerous __aeabi_lmul
- * calls.
- *
- * Second of all, the 8 functional registers are just not enough.
- * Setup for __aeabi_lmul, byteshift loads, pointers, and all arithmetic
- * need Lo registers, and this shuffling results in thousands more MOVs than
- * A32.
- *
- * A32 and T32 don't have this limitation. They can access all 14 registers,
- * do a 32->64 multiply with UMULL, and the flexible operand allowing free
- * shifts is helpful, too.
- *
- * Therefore, we do a quick sanity check.
- *
- * If compiling Thumb-1 for a target which supports ARM instructions, we
- * will emit a warning, as it is not a "sane" platform to compile for.
- *
- * Usually, if this happens, it is because of an accident and you probably
- * need to specify -march, as you likely meant to compile for a newer
- * architecture.
- *
- * Credit: large sections of the vectorial and asm source code paths
- * have been contributed by @easyaspi314
- */
- #if defined(__thumb__) && !defined(__thumb2__) && \
- defined(__ARM_ARCH_ISA_ARM)
- #warning "XXH3 is highly inefficient without ARM or Thumb-2."
- #endif
- /* ==========================================
- * Vectorization detection
- * ========================================== */
- #define XXH_SCALAR 0 /* Portable scalar version */
- #define XXH_SSE2 1 /* SSE2 for Pentium 4 and all x86_64 */
- #define XXH_AVX2 2 /* AVX2 for Haswell and Bulldozer */
- #define XXH_AVX512 3 /* AVX512 for Skylake and Icelake */
- #define XXH_NEON 4 /* NEON for most ARMv7-A and all AArch64 */
- #define XXH_VSX 5 /* VSX and ZVector for POWER8/z13 */
-
- #ifndef XXH_VECTOR /* can be defined on command line */
- #if defined(__AVX512F__)
- #define XXH_VECTOR XXH_AVX512
- #elif defined(__AVX2__)
- #define XXH_VECTOR XXH_AVX2
- #elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || \
- (defined(_M_IX86_FP) && (_M_IX86_FP == 2))
- #define XXH_VECTOR XXH_SSE2
- #elif defined(__GNUC__) /* msvc support maybe later */ \
- && (defined(__ARM_NEON__) || defined(__ARM_NEON)) && \
- (defined(__LITTLE_ENDIAN__) /* We only support little endian NEON */ \
- || (defined(__BYTE_ORDER__) && \
- __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__))
- #define XXH_VECTOR XXH_NEON
- #elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) || \
- (defined(__s390x__) && defined(__VEC__)) && \
- defined(__GNUC__) /* TODO: IBM XL */
- #define XXH_VECTOR XXH_VSX
+ #if (defined(__GNUC__) && (__GNUC__ >= 3)) || \
+ (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || \
+ defined(__clang__)
+ #define XXH_likely(x) __builtin_expect(x, 1)
+ #define XXH_unlikely(x) __builtin_expect(x, 0)
#else
- #define XXH_VECTOR XXH_SCALAR
+ #define XXH_likely(x) (x)
+ #define XXH_unlikely(x) (x)
#endif
- #endif
- /*
- * Controls the alignment of the accumulator,
- * for compatibility with aligned vector loads, which are usually faster.
- */
- #ifndef XXH_ACC_ALIGN
- #if defined(XXH_X86DISPATCH)
- #define XXH_ACC_ALIGN 64 /* for compatibility with avx512 */
- #elif XXH_VECTOR == XXH_SCALAR /* scalar */
- #define XXH_ACC_ALIGN 8
- #elif XXH_VECTOR == XXH_SSE2 /* sse2 */
- #define XXH_ACC_ALIGN 16
- #elif XXH_VECTOR == XXH_AVX2 /* avx2 */
- #define XXH_ACC_ALIGN 32
- #elif XXH_VECTOR == XXH_NEON /* neon */
- #define XXH_ACC_ALIGN 16
- #elif XXH_VECTOR == XXH_VSX /* vsx */
- #define XXH_ACC_ALIGN 16
- #elif XXH_VECTOR == XXH_AVX512 /* avx512 */
- #define XXH_ACC_ALIGN 64
+ #if defined(__GNUC__)
+ #if defined(__AVX2__)
+ #include <immintrin.h>
+ #elif defined(__SSE2__)
+ #include <emmintrin.h>
+ #elif defined(__ARM_NEON__) || defined(__ARM_NEON)
+ #define inline __inline__ /* circumvent a clang bug */
+ #include <arm_neon.h>
+ #undef inline
+ #endif
+ #elif defined(_MSC_VER)
+ #include <intrin.h>
#endif
- #endif
- #if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 || \
- XXH_VECTOR == XXH_AVX2 || XXH_VECTOR == XXH_AVX512
- #define XXH_SEC_ALIGN XXH_ACC_ALIGN
- #else
- #define XXH_SEC_ALIGN 8
- #endif
-
- /*
- * UGLY HACK:
- * GCC usually generates the best code with -O3 for xxHash.
- *
- * However, when targeting AVX2, it is overzealous in its unrolling
- * resulting in code roughly 3/4 the speed of Clang.
- *
- * There are other issues, such as GCC splitting _mm256_loadu_si256 into
- * _mm_loadu_si128 + _mm256_inserti128_si256. This is an optimization which
- * only applies to Sandy and Ivy Bridge... which don't even support AVX2.
- *
- * That is why when compiling the AVX2 version, it is recommended to use
- * either -O2 -mavx2 -march=haswell or -O2 -mavx2
- * -mno-avx256-split-unaligned-load for decent performance, or to use Clang
- * instead.
- *
- * Fortunately, we can control the first one with a pragma that forces GCC
- * into -O2, but the other one we can't control without "failed to inline
- * always inline function due to target mismatch" warnings.
- */
- #if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
- && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
- && defined(__OPTIMIZE__) && \
- !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */
- #pragma GCC push_options
- #pragma GCC optimize("-O2")
- #endif
-
- #if XXH_VECTOR == XXH_NEON
/*
- * NEON's setup for vmlal_u32 is a little more complicated than it is on
- * SSE2, AVX2, and VSX.
+ * One goal of XXH3 is to make it fast on both 32-bit and 64-bit, while
+ * remaining a true 64-bit/128-bit hash function.
*
- * While PMULUDQ and VMULEUW both perform a mask, VMLAL.U32 performs an
- * upcast.
+ * This is done by prioritizing a subset of 64-bit operations that can be
+ * emulated without too many steps on the average 32-bit machine.
*
- * To do the same operation, the 128-bit 'Q' register needs to be split
- * into two 64-bit 'D' registers, performing this operation::
+ * For example, these two lines seem similar, and run equally fast on
+ * 64-bit:
*
- * [ a | b ] |
- * '---------. .--------' | | x |
- * | .---------' '--------. |
- * [ a & 0xFFFFFFFF | b & 0xFFFFFFFF ],[ a >> 32 | b >> 32 ]
+ * xxh_u64 x;
+ * x ^= (x >> 47); // good
+ * x ^= (x >> 13); // bad
*
- * Due to significant changes in aarch64, the fastest method for aarch64
- * is completely different than the fastest method for ARMv7-A.
+ * However, to a 32-bit machine, there is a major difference.
*
- * ARMv7-A treats D registers as unions overlaying Q registers, so
- * modifying D11 will modify the high half of Q5. This is similar to how
- * modifying AH will only affect bits 8-15 of AX on x86.
+ * x ^= (x >> 47) looks like this:
*
- * VZIP takes two registers, and puts even lanes in one register and odd
- * lanes in the other.
+ * x.lo ^= (x.hi >> (47 - 32));
*
- * On ARMv7-A, this strangely modifies both parameters in place instead of
- * taking the usual 3-operand form.
+ * while x ^= (x >> 13) looks like this:
*
- * Therefore, if we want to do this, we can simply use a D-form VZIP.32 on
- * the lower and upper halves of the Q register to end up with the high
- * and low halves where we want - all in one instruction.
+ * // note: funnel shifts are not usually cheap.
+ * x.lo ^= (x.lo >> 13) | (x.hi << (32 - 13));
+ * x.hi ^= (x.hi >> 13);
*
- * vzip.32 d10, d11 @ d10 = { d10[0], d11[0] }; d11 = { d10[1],
- * d11[1] }
+ * The first one is significantly faster than the second, simply because
+ * the shift is larger than 32. This means:
+ * - All the bits we need are in the upper 32 bits, so we can ignore the
+ * lower 32 bits in the shift.
+ * - The shift result will always fit in the lower 32 bits, and
+ * therefore, we can ignore the upper 32 bits in the xor.
*
- * Unfortunately we need inline assembly for this: Instructions modifying
- * two registers at once is not possible in GCC or Clang's IR, and they
- * have to create a copy.
+ * Thanks to this optimization, XXH3 only requires these features to be
+ * efficient:
*
- * aarch64 requires a different approach.
+ * - Usable unaligned access
+ * - A 32-bit or 64-bit ALU
+ * - If 32-bit, a decent ADC instruction
+ * - A 32 or 64-bit multiply with a 64-bit result
+ * - For the 128-bit variant, a decent byteswap helps short inputs.
*
- * In order to make it easier to write a decent compiler for aarch64, many
- * quirks were removed, such as conditional execution.
+ * The first two are already required by XXH32, and almost all 32-bit and
+ * 64-bit platforms which can run XXH32 can run XXH3 efficiently.
*
- * NEON was also affected by this.
+ * Thumb-1, the classic 16-bit only subset of ARM's instruction set, is
+ * one notable exception.
*
- * aarch64 cannot access the high bits of a Q-form register, and writes to
- * a D-form register zero the high bits, similar to how writes to W-form
- * scalar registers (or DWORD registers on x86_64) work.
+ * First of all, Thumb-1 lacks support for the UMULL instruction which
+ * performs the important long multiply. This means numerous __aeabi_lmul
+ * calls.
*
- * The formerly free vget_high intrinsics now require a vext (with a few
- * exceptions)
+ * Second of all, the 8 functional registers are just not enough.
+ * Setup for __aeabi_lmul, byteshift loads, pointers, and all arithmetic
+ * need Lo registers, and this shuffling results in thousands more MOVs
+ * than A32.
*
- * Additionally, VZIP was replaced by ZIP1 and ZIP2, which are the
- * equivalent of PUNPCKL* and PUNPCKH* in SSE, respectively, in order to
- * only modify one operand.
+ * A32 and T32 don't have this limitation. They can access all 14
+ * registers, do a 32->64 multiply with UMULL, and the flexible operand
+ * allowing free shifts is helpful, too.
*
- * The equivalent of the VZIP.32 on the lower and upper halves would be
- * this mess:
+ * Therefore, we do a quick sanity check.
*
- * ext v2.4s, v0.4s, v0.4s, #2 // v2 = { v0[2], v0[3], v0[0], v0[1]
- * } zip1 v1.2s, v0.2s, v2.2s // v1 = { v0[0], v2[0] } zip2 v0.2s,
- * v0.2s, v1.2s // v0 = { v0[1], v2[1] }
+ * If compiling Thumb-1 for a target which supports ARM instructions, we
+ * will emit a warning, as it is not a "sane" platform to compile for.
*
- * Instead, we use a literal downcast, vmovn_u64 (XTN), and vshrn_n_u64
- * (SHRN):
+ * Usually, if this happens, it is because of an accident and you probably
+ * need to specify -march, as you likely meant to compile for a newer
+ * architecture.
*
- * shrn v1.2s, v0.2d, #32 // v1 = (uint32x2_t)(v0 >> 32);
- * xtn v0.2s, v0.2d // v0 = (uint32x2_t)(v0 & 0xFFFFFFFF);
- *
- * This is available on ARMv7-A, but is less efficient than a single
- * VZIP.32.
+ * Credit: large sections of the vectorial and asm source code paths
+ * have been contributed by @easyaspi314
*/
+ #if defined(__thumb__) && !defined(__thumb2__) && \
+ defined(__ARM_ARCH_ISA_ARM)
+ #warning "XXH3 is highly inefficient without ARM or Thumb-2."
+ #endif
+
+ /* ==========================================
+ * Vectorization detection
+ * ========================================== */
+
+ #ifdef XXH_DOXYGEN
+ /*!
+ * @ingroup tuning
+ * @brief Overrides the vectorization implementation chosen for XXH3.
+ *
+ * Can be defined to 0 to disable SIMD or any of the values mentioned in
+ * @ref XXH_VECTOR_TYPE.
+ *
+ * If this is not defined, it uses predefined macros to determine the
+ * best implementation.
+ */
+ #define XXH_VECTOR XXH_SCALAR
+/*!
+ * @ingroup tuning
+ * @brief Possible values for @ref XXH_VECTOR.
+ *
+ * Note that these are actually implemented as macros.
+ *
+ * If this is not defined, it is detected automatically.
+ * @ref XXH_X86DISPATCH overrides this.
+ */
+enum XXH_VECTOR_TYPE /* fake enum */ {
+
+ XXH_SCALAR = 0, /*!< Portable scalar version */
+ XXH_SSE2 = 1, /*!<
+ * SSE2 for Pentium 4, Opteron, all x86_64.
+ *
+ * @note SSE2 is also guaranteed on Windows 10, macOS, and
+ * Android x86.
+ */
+ XXH_AVX2 = 2, /*!< AVX2 for Haswell and Bulldozer */
+ XXH_AVX512 = 3, /*!< AVX512 for Skylake and Icelake */
+ XXH_NEON = 4, /*!< NEON for most ARMv7-A and all AArch64 */
+ XXH_VSX = 5, /*!< VSX and ZVector for POWER8/z13 (64-bit) */
+
+};
+
+ /*!
+ * @ingroup tuning
+ * @brief Selects the minimum alignment for XXH3's accumulators.
+ *
+ * When using SIMD, this should match the alignment reqired for said
+ * vector type, so, for example, 32 for AVX2.
+ *
+ * Default: Auto detected.
+ */
+ #define XXH_ACC_ALIGN 8
+ #endif
+
+ /* Actual definition */
+ #ifndef XXH_DOXYGEN
+ #define XXH_SCALAR 0
+ #define XXH_SSE2 1
+ #define XXH_AVX2 2
+ #define XXH_AVX512 3
+ #define XXH_NEON 4
+ #define XXH_VSX 5
+ #endif
+
+ #ifndef XXH_VECTOR /* can be defined on command line */
+ #if defined(__AVX512F__)
+ #define XXH_VECTOR XXH_AVX512
+ #elif defined(__AVX2__)
+ #define XXH_VECTOR XXH_AVX2
+ #elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || \
+ (defined(_M_IX86_FP) && (_M_IX86_FP == 2))
+ #define XXH_VECTOR XXH_SSE2
+ #elif defined(__GNUC__) /* msvc support maybe later */ \
+ && (defined(__ARM_NEON__) || defined(__ARM_NEON)) && \
+ (defined( \
+ __LITTLE_ENDIAN__) /* We only support little endian NEON */ \
+ || (defined(__BYTE_ORDER__) && \
+ __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__))
+ #define XXH_VECTOR XXH_NEON
+ #elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) || \
+ (defined(__s390x__) && defined(__VEC__)) && \
+ defined(__GNUC__) /* TODO: IBM XL */
+ #define XXH_VECTOR XXH_VSX
+ #else
+ #define XXH_VECTOR XXH_SCALAR
+ #endif
+ #endif
/*
- * Function-like macro:
- * void XXH_SPLIT_IN_PLACE(uint64x2_t &in, uint32x2_t &outLo, uint32x2_t
- * &outHi)
- * {
-
- * outLo = (uint32x2_t)(in & 0xFFFFFFFF);
- * outHi = (uint32x2_t)(in >> 32);
- * in = UNDEFINED;
- * }
+ * Controls the alignment of the accumulator,
+ * for compatibility with aligned vector loads, which are usually faster.
*/
- #if !defined(XXH_NO_VZIP_HACK) /* define to disable */ \
- && defined(__GNUC__) && !defined(__aarch64__) && !defined(__arm64__)
- #define XXH_SPLIT_IN_PLACE(in, outLo, outHi) \
- do { \
- \
- /* Undocumented GCC/Clang operand modifier: %e0 = lower D half, \
- * %f0 = upper D half */ \
- /* https://github.com/gcc-mirror/gcc/blob/38cf91e5/gcc/config/arm/arm.c#L22486 \
- */ \
- /* https://github.com/llvm-mirror/llvm/blob/2c4ca683/lib/Target/ARM/ARMAsmPrinter.cpp#L399 \
- */ \
- __asm__("vzip.32 %e0, %f0" : "+w"(in)); \
- (outLo) = vget_low_u32(vreinterpretq_u32_u64(in)); \
- (outHi) = vget_high_u32(vreinterpretq_u32_u64(in)); \
- \
- } while (0)
+ #ifndef XXH_ACC_ALIGN
+ #if defined(XXH_X86DISPATCH)
+ #define XXH_ACC_ALIGN 64 /* for compatibility with avx512 */
+ #elif XXH_VECTOR == XXH_SCALAR /* scalar */
+ #define XXH_ACC_ALIGN 8
+ #elif XXH_VECTOR == XXH_SSE2 /* sse2 */
+ #define XXH_ACC_ALIGN 16
+ #elif XXH_VECTOR == XXH_AVX2 /* avx2 */
+ #define XXH_ACC_ALIGN 32
+ #elif XXH_VECTOR == XXH_NEON /* neon */
+ #define XXH_ACC_ALIGN 16
+ #elif XXH_VECTOR == XXH_VSX /* vsx */
+ #define XXH_ACC_ALIGN 16
+ #elif XXH_VECTOR == XXH_AVX512 /* avx512 */
+ #define XXH_ACC_ALIGN 64
+ #endif
+ #endif
+ #if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 || \
+ XXH_VECTOR == XXH_AVX2 || XXH_VECTOR == XXH_AVX512
+ #define XXH_SEC_ALIGN XXH_ACC_ALIGN
#else
- #define XXH_SPLIT_IN_PLACE(in, outLo, outHi) \
- do { \
- \
- (outLo) = vmovn_u64(in); \
- (outHi) = vshrn_n_u64((in), 32); \
- \
- } while (0)
+ #define XXH_SEC_ALIGN 8
+ #endif
+ /*
+ * UGLY HACK:
+ * GCC usually generates the best code with -O3 for xxHash.
+ *
+ * However, when targeting AVX2, it is overzealous in its unrolling
+ * resulting in code roughly 3/4 the speed of Clang.
+ *
+ * There are other issues, such as GCC splitting _mm256_loadu_si256 into
+ * _mm_loadu_si128 + _mm256_inserti128_si256. This is an optimization
+ * which only applies to Sandy and Ivy Bridge... which don't even support
+ * AVX2.
+ *
+ * That is why when compiling the AVX2 version, it is recommended to use
+ * either -O2 -mavx2 -march=haswell or -O2 -mavx2
+ * -mno-avx256-split-unaligned-load for decent performance, or to use
+ * Clang instead.
+ *
+ * Fortunately, we can control the first one with a pragma that forces GCC
+ * into -O2, but the other one we can't control without "failed to inline
+ * always inline function due to target mismatch" warnings.
+ */
+ #if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
+ && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
+ && defined(__OPTIMIZE__) && \
+ !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */
+ #pragma GCC push_options
+ #pragma GCC optimize("-O2")
#endif
- #endif /* XXH_VECTOR == XXH_NEON */
- /*
- * VSX and Z Vector helpers.
- *
- * This is very messy, and any pull requests to clean this up are welcome.
- *
- * There are a lot of problems with supporting VSX and s390x, due to
- * inconsistent intrinsics, spotty coverage, and multiple endiannesses.
- */
- #if XXH_VECTOR == XXH_VSX
- #if defined(__s390x__)
- #include <s390intrin.h>
- #else
- /* gcc's altivec.h can have the unwanted consequence to unconditionally
- * #define bool, vector, and pixel keywords,
- * with bad consequences for programs already using these keywords for
- * other purposes. The paragraph defining these macros is skipped when
- * __APPLE_ALTIVEC__ is defined.
- * __APPLE_ALTIVEC__ is _generally_ defined automatically by the
- * compiler, but it seems that, in some cases, it isn't. Force the build
- * macro to be defined, so that keywords are not altered.
+ #if XXH_VECTOR == XXH_NEON
+ /*
+ * NEON's setup for vmlal_u32 is a little more complicated than it is on
+ * SSE2, AVX2, and VSX.
+ *
+ * While PMULUDQ and VMULEUW both perform a mask, VMLAL.U32 performs an
+ * upcast.
+ *
+ * To do the same operation, the 128-bit 'Q' register needs to be split
+ * into two 64-bit 'D' registers, performing this operation::
+ *
+ * [ a | b ] |
+ * '---------. .--------' | | x |
+ * | .---------' '--------. |
+ * [ a & 0xFFFFFFFF | b & 0xFFFFFFFF ],[ a >> 32 | b >> 32
+ * ]
+ *
+ * Due to significant changes in aarch64, the fastest method for aarch64
+ * is completely different than the fastest method for ARMv7-A.
+ *
+ * ARMv7-A treats D registers as unions overlaying Q registers, so
+ * modifying D11 will modify the high half of Q5. This is similar to how
+ * modifying AH will only affect bits 8-15 of AX on x86.
+ *
+ * VZIP takes two registers, and puts even lanes in one register and odd
+ * lanes in the other.
+ *
+ * On ARMv7-A, this strangely modifies both parameters in place instead
+ * of taking the usual 3-operand form.
+ *
+ * Therefore, if we want to do this, we can simply use a D-form VZIP.32
+ * on the lower and upper halves of the Q register to end up with the
+ * high and low halves where we want - all in one instruction.
+ *
+ * vzip.32 d10, d11 @ d10 = { d10[0], d11[0] }; d11 = {
+
+ * d10[1], d11[1] }
+ *
+ * Unfortunately we need inline assembly for this: Instructions
+ * modifying two registers at once is not possible in GCC or Clang's IR,
+ * and they have to create a copy.
+ *
+ * aarch64 requires a different approach.
+ *
+ * In order to make it easier to write a decent compiler for aarch64,
+ * many quirks were removed, such as conditional execution.
+ *
+ * NEON was also affected by this.
+ *
+ * aarch64 cannot access the high bits of a Q-form register, and writes
+ * to a D-form register zero the high bits, similar to how writes to
+ * W-form scalar registers (or DWORD registers on x86_64) work.
+ *
+ * The formerly free vget_high intrinsics now require a vext (with a few
+ * exceptions)
+ *
+ * Additionally, VZIP was replaced by ZIP1 and ZIP2, which are the
+ * equivalent of PUNPCKL* and PUNPCKH* in SSE, respectively, in order to
+ * only modify one operand.
+ *
+ * The equivalent of the VZIP.32 on the lower and upper halves would be
+ * this mess:
+ *
+ * ext v2.4s, v0.4s, v0.4s, #2 // v2 = { v0[2], v0[3], v0[0],
+ * v0[1] } zip1 v1.2s, v0.2s, v2.2s // v1 = { v0[0], v2[0] } zip2
+ * v0.2s, v0.2s, v1.2s // v0 = { v0[1], v2[1] }
+ *
+ * Instead, we use a literal downcast, vmovn_u64 (XTN), and vshrn_n_u64
+ * (SHRN):
+ *
+ * shrn v1.2s, v0.2d, #32 // v1 = (uint32x2_t)(v0 >> 32);
+ * xtn v0.2s, v0.2d // v0 = (uint32x2_t)(v0 & 0xFFFFFFFF);
+ *
+ * This is available on ARMv7-A, but is less efficient than a single
+ * VZIP.32.
*/
- #if defined(__GNUC__) && !defined(__APPLE_ALTIVEC__)
- #define __APPLE_ALTIVEC__
- #endif
- #include <altivec.h>
- #endif
-typedef __vector unsigned long long xxh_u64x2;
-typedef __vector unsigned char xxh_u8x16;
-typedef __vector unsigned xxh_u32x4;
+ /*!
+ * Function-like macro:
+ * void XXH_SPLIT_IN_PLACE(uint64x2_t &in, uint32x2_t &outLo, uint32x2_t
+ * &outHi)
+ * {
- #ifndef XXH_VSX_BE
- #if defined(__BIG_ENDIAN__) || \
- (defined(__BYTE_ORDER__) && \
- __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
- #define XXH_VSX_BE 1
- #elif defined(__VEC_ELEMENT_REG_ORDER__) && \
- __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__
- #warning \
- "-maltivec=be is not recommended. Please use native endianness."
- #define XXH_VSX_BE 1
+ * outLo = (uint32x2_t)(in & 0xFFFFFFFF);
+ * outHi = (uint32x2_t)(in >> 32);
+ * in = UNDEFINED;
+ * }
+ */
+ #if !defined(XXH_NO_VZIP_HACK) /* define to disable */ \
+ && defined(__GNUC__) && !defined(__aarch64__) && \
+ !defined(__arm64__)
+ #define XXH_SPLIT_IN_PLACE(in, outLo, outHi) \
+ do { \
+ \
+ /* Undocumented GCC/Clang operand modifier: %e0 = lower D half, \
+ * %f0 = upper D half */ \
+ /* https://github.com/gcc-mirror/gcc/blob/38cf91e5/gcc/config/arm/arm.c#L22486 \
+ */ \
+ /* https://github.com/llvm-mirror/llvm/blob/2c4ca683/lib/Target/ARM/ARMAsmPrinter.cpp#L399 \
+ */ \
+ __asm__("vzip.32 %e0, %f0" : "+w"(in)); \
+ (outLo) = vget_low_u32(vreinterpretq_u32_u64(in)); \
+ (outHi) = vget_high_u32(vreinterpretq_u32_u64(in)); \
+ \
+ } while (0)
#else
- #define XXH_VSX_BE 0
+ #define XXH_SPLIT_IN_PLACE(in, outLo, outHi) \
+ do { \
+ \
+ (outLo) = vmovn_u64(in); \
+ (outHi) = vshrn_n_u64((in), 32); \
+ \
+ } while (0)
#endif
- #endif /* !defined(XXH_VSX_BE) */
+ #endif /* XXH_VECTOR == XXH_NEON */
- #if XXH_VSX_BE
- /* A wrapper for POWER9's vec_revb. */
- #if defined(__POWER9_VECTOR__) || \
- (defined(__clang__) && defined(__s390x__))
- #define XXH_vec_revb vec_revb
+ /*
+ * VSX and Z Vector helpers.
+ *
+ * This is very messy, and any pull requests to clean this up are welcome.
+ *
+ * There are a lot of problems with supporting VSX and s390x, due to
+ * inconsistent intrinsics, spotty coverage, and multiple endiannesses.
+ */
+ #if XXH_VECTOR == XXH_VSX
+ #if defined(__s390x__)
+ #include <s390intrin.h>
#else
+ /* gcc's altivec.h can have the unwanted consequence to
+ * unconditionally #define bool, vector, and pixel keywords, with bad
+ * consequences for programs already using these keywords for other
+ * purposes. The paragraph defining these macros is skipped when
+ * __APPLE_ALTIVEC__ is defined.
+ * __APPLE_ALTIVEC__ is _generally_ defined automatically by the
+ * compiler, but it seems that, in some cases, it isn't. Force the
+ * build macro to be defined, so that keywords are not altered.
+ */
+ #if defined(__GNUC__) && !defined(__APPLE_ALTIVEC__)
+ #define __APPLE_ALTIVEC__
+ #endif
+ #include <altivec.h>
+ #endif
+
+typedef __vector unsigned long long xxh_u64x2;
+typedef __vector unsigned char xxh_u8x16;
+typedef __vector unsigned xxh_u32x4;
+
+ #ifndef XXH_VSX_BE
+ #if defined(__BIG_ENDIAN__) || \
+ (defined(__BYTE_ORDER__) && \
+ __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+ #define XXH_VSX_BE 1
+ #elif defined(__VEC_ELEMENT_REG_ORDER__) && \
+ __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__
+ #warning \
+ "-maltivec=be is not recommended. Please use native endianness."
+ #define XXH_VSX_BE 1
+ #else
+ #define XXH_VSX_BE 0
+ #endif
+ #endif /* !defined(XXH_VSX_BE) */
+
+ #if XXH_VSX_BE
+ #if defined(__POWER9_VECTOR__) || \
+ (defined(__clang__) && defined(__s390x__))
+ #define XXH_vec_revb vec_revb
+ #else
+/*!
+ * A polyfill for POWER9's vec_revb().
+ */
XXH_FORCE_INLINE xxh_u64x2 XXH_vec_revb(xxh_u64x2 val) {
xxh_u8x16 const vByteSwap = {0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
@@ -2834,40 +3458,40 @@ XXH_FORCE_INLINE xxh_u64x2 XXH_vec_revb(xxh_u64x2 val) {
}
- #endif
- #endif /* XXH_VSX_BE */
+ #endif
+ #endif /* XXH_VSX_BE */
-/*
- * Performs an unaligned load and byte swaps it on big endian.
+/*!
+ * Performs an unaligned vector load and byte swaps it on big endian.
*/
XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr) {
xxh_u64x2 ret;
memcpy(&ret, ptr, sizeof(xxh_u64x2));
- #if XXH_VSX_BE
+ #if XXH_VSX_BE
ret = XXH_vec_revb(ret);
- #endif
+ #endif
return ret;
}
- /*
- * vec_mulo and vec_mule are very problematic intrinsics on PowerPC
- *
- * These intrinsics weren't added until GCC 8, despite existing for a
- * while, and they are endian dependent. Also, their meaning swap
- * depending on version.
- * */
- #if defined(__s390x__)
- /* s390x is always big endian, no issue on this platform */
- #define XXH_vec_mulo vec_mulo
- #define XXH_vec_mule vec_mule
- #elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw)
- /* Clang has a better way to control this, we can just use the builtin
- * which doesn't swap. */
- #define XXH_vec_mulo __builtin_altivec_vmulouw
- #define XXH_vec_mule __builtin_altivec_vmuleuw
- #else
+ /*
+ * vec_mulo and vec_mule are very problematic intrinsics on PowerPC
+ *
+ * These intrinsics weren't added until GCC 8, despite existing for a
+ * while, and they are endian dependent. Also, their meaning swap
+ * depending on version.
+ * */
+ #if defined(__s390x__)
+ /* s390x is always big endian, no issue on this platform */
+ #define XXH_vec_mulo vec_mulo
+ #define XXH_vec_mule vec_mule
+ #elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw)
+ /* Clang has a better way to control this, we can just use the builtin
+ * which doesn't swap. */
+ #define XXH_vec_mulo __builtin_altivec_vmulouw
+ #define XXH_vec_mule __builtin_altivec_vmuleuw
+ #else
/* gcc needs inline assembly */
/* Adapted from
* https://github.com/google/highwayhash/blob/master/highwayhash/hh_vsx.h. */
@@ -2887,40 +3511,41 @@ XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4 a, xxh_u32x4 b) {
}
- #endif /* XXH_vec_mulo, XXH_vec_mule */
- #endif /* XXH_VECTOR == XXH_VSX */
+ #endif /* XXH_vec_mulo, XXH_vec_mule */
+ #endif /* XXH_VECTOR == XXH_VSX */
- /* prefetch
- * can be disabled, by declaring XXH_NO_PREFETCH build macro */
- #if defined(XXH_NO_PREFETCH)
- #define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */
- #else
- #if defined(_MSC_VER) && \
- (defined(_M_X64) || \
- defined( \
- _M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
- #include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
- #define XXH_PREFETCH(ptr) _mm_prefetch((const char *)(ptr), _MM_HINT_T0)
- #elif defined(__GNUC__) && \
- ((__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 1)))
- #define XXH_PREFETCH(ptr) \
- __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
- #else
+ /* prefetch
+ * can be disabled, by declaring XXH_NO_PREFETCH build macro */
+ #if defined(XXH_NO_PREFETCH)
#define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */
- #endif
- #endif /* XXH_NO_PREFETCH */
+ #else
+ #if defined(_MSC_VER) && \
+ (defined(_M_X64) || \
+ defined( \
+ _M_IX86)) /* _mm_prefetch() not defined outside of x86/x64 */
+ #include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
+ #define XXH_PREFETCH(ptr) \
+ _mm_prefetch((const char *)(ptr), _MM_HINT_T0)
+ #elif defined(__GNUC__) && \
+ ((__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 1)))
+ #define XXH_PREFETCH(ptr) \
+ __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
+ #else
+ #define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */
+ #endif
+ #endif /* XXH_NO_PREFETCH */
- /* ==========================================
- * XXH3 default settings
- * ========================================== */
+ /* ==========================================
+ * XXH3 default settings
+ * ========================================== */
- #define XXH_SECRET_DEFAULT_SIZE 192 /* minimum XXH3_SECRET_SIZE_MIN */
+ #define XXH_SECRET_DEFAULT_SIZE 192 /* minimum XXH3_SECRET_SIZE_MIN */
- #if (XXH_SECRET_DEFAULT_SIZE < XXH3_SECRET_SIZE_MIN)
- #error "default keyset is not large enough"
- #endif
+ #if (XXH_SECRET_DEFAULT_SIZE < XXH3_SECRET_SIZE_MIN)
+ #error "default keyset is not large enough"
+ #endif
-/* Pseudorandom secret taken directly from FARSH */
+/*! Pseudorandom secret taken directly from FARSH. */
XXH_ALIGN(64)
static const xxh_u8 XXH3_kSecret[XXH_SECRET_DEFAULT_SIZE] = {
@@ -2943,69 +3568,79 @@ static const xxh_u8 XXH3_kSecret[XXH_SECRET_DEFAULT_SIZE] = {
};
- #ifdef XXH_OLD_NAMES
- #define kSecret XXH3_kSecret
- #endif
+ #ifdef XXH_OLD_NAMES
+ #define kSecret XXH3_kSecret
+ #endif
- /*
- * Calculates a 32-bit to 64-bit long multiply.
- *
- * Wraps __emulu on MSVC x86 because it tends to call __allmul when it
- * doesn't need to (but it shouldn't need to anyways, it is about 7
- * instructions to do a 64x64 multiply...). Since we know that this will
- * _always_ emit MULL, we use that instead of the normal method.
- *
- * If you are compiling for platforms like Thumb-1 and don't have a better
- * option, you may also want to write your own long multiply routine here.
- *
- * XXH_FORCE_INLINE xxh_u64 XXH_mult32to64(xxh_u64 x, xxh_u64 y)
- * {
+ #ifdef XXH_DOXYGEN
+/*!
+ * @brief Calculates a 32-bit to 64-bit long multiply.
+ *
+ * Implemented as a macro.
+ *
+ * Wraps `__emulu` on MSVC x86 because it tends to call `__allmul` when it
+ * doesn't need to (but it shouldn't need to anyways, it is about 7 instructions
+ * to do a 64x64 multiply...). Since we know that this will _always_ emit
+ * `MULL`, we use that instead of the normal method.
+ *
+ * If you are compiling for platforms like Thumb-1 and don't have a better
+ * option, you may also want to write your own long multiply routine here.
+ *
+ * @param x, y Numbers to be multiplied
+ * @return 64-bit product of the low 32 bits of @p x and @p y.
+ */
+XXH_FORCE_INLINE xxh_u64 XXH_mult32to64(xxh_u64 x, xxh_u64 y) {
- * return (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF);
- * }
- */
- #if defined(_MSC_VER) && defined(_M_IX86)
- #include <intrin.h>
- #define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y))
- #else
- /*
- * Downcast + upcast is usually better than masking on older compilers
- * like GCC 4.2 (especially 32-bit ones), all without affecting newer
- * compilers.
- *
- * The other method, (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF), will AND both
- * operands and perform a full 64x64 multiply -- entirely redundant on
- * 32-bit.
- */
- #define XXH_mult32to64(x, y) \
- ((xxh_u64)(xxh_u32)(x) * (xxh_u64)(xxh_u32)(y))
- #endif
+ return (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF);
-/*
- * Calculates a 64->128-bit long multiply.
+}
+
+ #elif defined(_MSC_VER) && defined(_M_IX86)
+ #include <intrin.h>
+ #define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y))
+ #else
+ /*
+ * Downcast + upcast is usually better than masking on older compilers
+ * like GCC 4.2 (especially 32-bit ones), all without affecting newer
+ * compilers.
+ *
+ * The other method, (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF), will AND both
+ * operands and perform a full 64x64 multiply -- entirely redundant on
+ * 32-bit.
+ */
+ #define XXH_mult32to64(x, y) \
+ ((xxh_u64)(xxh_u32)(x) * (xxh_u64)(xxh_u32)(y))
+ #endif
+
+/*!
+ * @brief Calculates a 64->128-bit long multiply.
+ *
+ * Uses `__uint128_t` and `_umul128` if available, otherwise uses a scalar
+ * version.
*
- * Uses __uint128_t and _umul128 if available, otherwise uses a scalar version.
+ * @param lhs, rhs The 64-bit integers to be multiplied
+ * @return The 128-bit result represented in an @ref XXH128_hash_t.
*/
static XXH128_hash_t XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs) {
- /*
- * GCC/Clang __uint128_t method.
- *
- * On most 64-bit targets, GCC and Clang define a __uint128_t type.
- * This is usually the best way as it usually uses a native long 64-bit
- * multiply, such as MULQ on x86_64 or MUL + UMULH on aarch64.
- *
- * Usually.
- *
- * Despite being a 32-bit platform, Clang (and emscripten) define this type
- * despite not having the arithmetic for it. This results in a laggy
- * compiler builtin call which calculates a full 128-bit multiply.
- * In that case it is best to use the portable one.
- * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677
- */
- #if defined(__GNUC__) && !defined(__wasm__) && \
- defined(__SIZEOF_INT128__) || \
- (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
+ /*
+ * GCC/Clang __uint128_t method.
+ *
+ * On most 64-bit targets, GCC and Clang define a __uint128_t type.
+ * This is usually the best way as it usually uses a native long 64-bit
+ * multiply, such as MULQ on x86_64 or MUL + UMULH on aarch64.
+ *
+ * Usually.
+ *
+ * Despite being a 32-bit platform, Clang (and emscripten) define this
+ * type despite not having the arithmetic for it. This results in a laggy
+ * compiler builtin call which calculates a full 128-bit multiply.
+ * In that case it is best to use the portable one.
+ * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677
+ */
+ #if defined(__GNUC__) && !defined(__wasm__) && \
+ defined(__SIZEOF_INT128__) || \
+ (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
__uint128_t const product = (__uint128_t)lhs * (__uint128_t)rhs;
XXH128_hash_t r128;
@@ -3013,19 +3648,19 @@ static XXH128_hash_t XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs) {
r128.high64 = (xxh_u64)(product >> 64);
return r128;
- /*
- * MSVC for x64's _umul128 method.
- *
- * xxh_u64 _umul128(xxh_u64 Multiplier, xxh_u64 Multiplicand, xxh_u64
- * *HighProduct);
- *
- * This compiles to single operand MUL on x64.
- */
- #elif defined(_M_X64) || defined(_M_IA64)
+ /*
+ * MSVC for x64's _umul128 method.
+ *
+ * xxh_u64 _umul128(xxh_u64 Multiplier, xxh_u64 Multiplicand, xxh_u64
+ * *HighProduct);
+ *
+ * This compiles to single operand MUL on x64.
+ */
+ #elif defined(_M_X64) || defined(_M_IA64)
- #ifndef _MSC_VER
- #pragma intrinsic(_umul128)
- #endif
+ #ifndef _MSC_VER
+ #pragma intrinsic(_umul128)
+ #endif
xxh_u64 product_high;
xxh_u64 const product_low = _umul128(lhs, rhs, &product_high);
XXH128_hash_t r128;
@@ -3033,7 +3668,7 @@ static XXH128_hash_t XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs) {
r128.high64 = product_high;
return r128;
- #else
+ #else
/*
* Portable scalar method. Optimized for 32-bit and 64-bit ALUs.
*
@@ -3093,16 +3728,20 @@ static XXH128_hash_t XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs) {
r128.low64 = lower;
r128.high64 = upper;
return r128;
- #endif
+ #endif
}
-/*
- * Does a 64-bit to 128-bit multiply, then XOR folds it.
+/*!
+ * @brief Calculates a 64-bit to 128-bit multiply, then XOR folds it.
*
* The reason for the separate function is to prevent passing too many structs
* around by value. This will hopefully inline the multiply, but we don't force
* it.
+ *
+ * @param lhs, rhs The 64-bit integers to multiply
+ * @return The low 64 bits of the product XOR'd by the high 64 bits.
+ * @see XXH_mult64to128()
*/
static xxh_u64 XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs) {
@@ -3111,7 +3750,7 @@ static xxh_u64 XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs) {
}
-/* Seems to produce slightly better code on GCC for some reason. */
+/*! Seems to produce slightly better code on GCC for some reason. */
XXH_FORCE_INLINE xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift) {
XXH_ASSERT(0 <= shift && shift < 64);
@@ -3216,7 +3855,7 @@ XXH_FORCE_INLINE XXH64_hash_t XXH3_len_4to8_64b(const xxh_u8 *input, size_t len,
XXH_ASSERT(input != NULL);
XXH_ASSERT(secret != NULL);
- XXH_ASSERT(4 <= len && len < 8);
+ XXH_ASSERT(4 <= len && len <= 8);
seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32;
{
@@ -3239,7 +3878,7 @@ XXH_FORCE_INLINE XXH64_hash_t XXH3_len_9to16_64b(const xxh_u8 *input,
XXH_ASSERT(input != NULL);
XXH_ASSERT(secret != NULL);
- XXH_ASSERT(8 <= len && len <= 16);
+ XXH_ASSERT(9 <= len && len <= 16);
{
xxh_u64 const bitflip1 =
@@ -3306,11 +3945,10 @@ XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(const xxh_u8 *XXH_RESTRICT input,
const xxh_u8 *XXH_RESTRICT secret,
xxh_u64 seed64) {
- #if defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
- && defined(__i386__) && defined(__SSE2__) /* x86 + SSE2 */ \
- && \
- !defined( \
- XXH_ENABLE_AUTOVECTORIZE) /* Define to disable like XXH32 hack */
+ #if defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
+ && defined(__i386__) && defined(__SSE2__) /* x86 + SSE2 */ \
+ && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable like \
+ XXH32 hack */
/*
* UGLY HACK:
* GCC for x86 tends to autovectorize the 128-bit multiply, resulting in
@@ -3326,8 +3964,8 @@ XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(const xxh_u8 *XXH_RESTRICT input,
* GCC generates much better scalar code than Clang for the rest of XXH3,
* which is why finding a more optimal codepath is an interest.
*/
- __asm__("" : "+r"(seed64));
- #endif
+ XXH_COMPILER_GUARD(seed64);
+ #endif
{
xxh_u64 const input_lo = XXH_readLE64(input);
@@ -3381,7 +4019,7 @@ XXH_FORCE_INLINE XXH64_hash_t XXH3_len_17to128_64b(
}
- #define XXH3_MIDSIZE_MAX 240
+ #define XXH3_MIDSIZE_MAX 240
XXH_NO_INLINE XXH64_hash_t XXH3_len_129to240_64b(
const xxh_u8 *XXH_RESTRICT input, size_t len,
@@ -3391,8 +4029,8 @@ XXH_NO_INLINE XXH64_hash_t XXH3_len_129to240_64b(
(void)secretSize;
XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
- #define XXH3_MIDSIZE_STARTOFFSET 3
- #define XXH3_MIDSIZE_LASTOFFSET 17
+ #define XXH3_MIDSIZE_STARTOFFSET 3
+ #define XXH3_MIDSIZE_LASTOFFSET 17
{
@@ -3407,31 +4045,31 @@ XXH_NO_INLINE XXH64_hash_t XXH3_len_129to240_64b(
acc = XXH3_avalanche(acc);
XXH_ASSERT(nbRounds >= 8);
- #if defined(__clang__) /* Clang */ \
- && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \
- && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */
- /*
- * UGLY HACK:
- * Clang for ARMv7-A tries to vectorize this loop, similar to GCC x86.
- * In everywhere else, it uses scalar code.
- *
- * For 64->128-bit multiplies, even if the NEON was 100% optimal, it
- * would still be slower than UMAAL (see XXH_mult64to128).
- *
- * Unfortunately, Clang doesn't handle the long multiplies properly and
- * converts them to the nonexistent "vmulq_u64" intrinsic, which is then
- * scalarized into an ugly mess of VMOV.32 instructions.
- *
- * This mess is difficult to avoid without turning autovectorization
- * off completely, but they are usually relatively minor and/or not
- * worth it to fix.
- *
- * This loop is the easiest to fix, as unlike XXH32, this pragma
- * _actually works_ because it is a loop vectorization instead of an
- * SLP vectorization.
- */
- #pragma clang loop vectorize(disable)
- #endif
+ #if defined(__clang__) /* Clang */ \
+ && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \
+ && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */
+ /*
+ * UGLY HACK:
+ * Clang for ARMv7-A tries to vectorize this loop, similar to GCC x86.
+ * In everywhere else, it uses scalar code.
+ *
+ * For 64->128-bit multiplies, even if the NEON was 100% optimal, it
+ * would still be slower than UMAAL (see XXH_mult64to128).
+ *
+ * Unfortunately, Clang doesn't handle the long multiplies properly and
+ * converts them to the nonexistent "vmulq_u64" intrinsic, which is then
+ * scalarized into an ugly mess of VMOV.32 instructions.
+ *
+ * This mess is difficult to avoid without turning autovectorization
+ * off completely, but they are usually relatively minor and/or not
+ * worth it to fix.
+ *
+ * This loop is the easiest to fix, as unlike XXH32, this pragma
+ * _actually works_ because it is a loop vectorization instead of an
+ * SLP vectorization.
+ */
+ #pragma clang loop vectorize(disable)
+ #endif
for (i = 8; i < nbRounds; i++) {
acc +=
@@ -3450,17 +4088,17 @@ XXH_NO_INLINE XXH64_hash_t XXH3_len_129to240_64b(
}
- /* ======= Long Keys ======= */
+ /* ======= Long Keys ======= */
- #define XXH_STRIPE_LEN 64
- #define XXH_SECRET_CONSUME_RATE \
- 8 /* nb of secret bytes consumed at each accumulation */
- #define XXH_ACC_NB (XXH_STRIPE_LEN / sizeof(xxh_u64))
+ #define XXH_STRIPE_LEN 64
+ #define XXH_SECRET_CONSUME_RATE \
+ 8 /* nb of secret bytes consumed at each accumulation */
+ #define XXH_ACC_NB (XXH_STRIPE_LEN / sizeof(xxh_u64))
- #ifdef XXH_OLD_NAMES
- #define STRIPE_LEN XXH_STRIPE_LEN
- #define ACC_NB XXH_ACC_NB
- #endif
+ #ifdef XXH_OLD_NAMES
+ #define STRIPE_LEN XXH_STRIPE_LEN
+ #define ACC_NB XXH_ACC_NB
+ #endif
XXH_FORCE_INLINE void XXH_writeLE64(void *dst, xxh_u64 v64) {
@@ -3469,56 +4107,58 @@ XXH_FORCE_INLINE void XXH_writeLE64(void *dst, xxh_u64 v64) {
}
- /* Several intrinsic functions below are supposed to accept __int64 as
- * argument, as documented in
- * https://software.intel.com/sites/landingpage/IntrinsicsGuide/ . However,
- * several environments do not define __int64 type, requiring a workaround.
- */
- #if !defined(__VMS) && \
- (defined(__cplusplus) || (defined(__STDC_VERSION__) && \
- (__STDC_VERSION__ >= 199901L) /* C99 */))
+ /* Several intrinsic functions below are supposed to accept __int64 as
+ * argument, as documented in
+ * https://software.intel.com/sites/landingpage/IntrinsicsGuide/ .
+ * However, several environments do not define __int64 type,
+ * requiring a workaround.
+ */
+ #if !defined(__VMS) && \
+ (defined(__cplusplus) || (defined(__STDC_VERSION__) && \
+ (__STDC_VERSION__ >= 199901L) /* C99 */))
typedef int64_t xxh_i64;
- #else
+ #else
/* the following type must have a width of 64-bit */
typedef long long xxh_i64;
- #endif
+ #endif
- /*
- * XXH3_accumulate_512 is the tightest loop for long inputs, and it is the
- * most optimized.
- *
- * It is a hardened version of UMAC, based off of FARSH's implementation.
- *
- * This was chosen because it adapts quite well to 32-bit, 64-bit, and SIMD
- * implementations, and it is ridiculously fast.
- *
- * We harden it by mixing the original input to the accumulators as well as
- * the product.
- *
- * This means that in the (relatively likely) case of a multiply by zero, the
- * original input is preserved.
- *
- * On 128-bit inputs, we swap 64-bit pairs when we add the input to improve
- * cross-pollination, as otherwise the upper and lower halves would be
- * essentially independent.
- *
- * This doesn't matter on 64-bit hashes since they all get merged together in
- * the end, so we skip the extra step.
- *
- * Both XXH3_64bits and XXH3_128bits use this subroutine.
- */
+ /*
+ * XXH3_accumulate_512 is the tightest loop for long inputs, and it is the
+ * most optimized.
+ *
+ * It is a hardened version of UMAC, based off of FARSH's implementation.
+ *
+ * This was chosen because it adapts quite well to 32-bit, 64-bit, and SIMD
+ * implementations, and it is ridiculously fast.
+ *
+ * We harden it by mixing the original input to the accumulators as well as
+ * the product.
+ *
+ * This means that in the (relatively likely) case of a multiply by zero,
+ * the original input is preserved.
+ *
+ * On 128-bit inputs, we swap 64-bit pairs when we add the input to improve
+ * cross-pollination, as otherwise the upper and lower halves would be
+ * essentially independent.
+ *
+ * This doesn't matter on 64-bit hashes since they all get merged together
+ * in the end, so we skip the extra step.
+ *
+ * Both XXH3_64bits and XXH3_128bits use this subroutine.
+ */
- #if (XXH_VECTOR == XXH_AVX512) || defined(XXH_X86DISPATCH)
+ #if (XXH_VECTOR == XXH_AVX512) || \
+ (defined(XXH_DISPATCH_AVX512) && XXH_DISPATCH_AVX512 != 0)
- #ifndef XXH_TARGET_AVX512
- #define XXH_TARGET_AVX512 /* disable attribute target */
- #endif
+ #ifndef XXH_TARGET_AVX512
+ #define XXH_TARGET_AVX512 /* disable attribute target */
+ #endif
XXH_FORCE_INLINE XXH_TARGET_AVX512 void XXH3_accumulate_512_avx512(
void *XXH_RESTRICT acc, const void *XXH_RESTRICT input,
const void *XXH_RESTRICT secret) {
- XXH_ALIGN(64) __m512i *const xacc = (__m512i *)acc;
+ __m512i *const xacc = (__m512i *)acc;
XXH_ASSERT((((size_t)acc) & 63) == 0);
XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
@@ -3576,8 +4216,8 @@ XXH_FORCE_INLINE XXH_TARGET_AVX512 void XXH3_scrambleAcc_avx512(
XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
{
- XXH_ALIGN(64) __m512i *const xacc = (__m512i *)acc;
- const __m512i prime32 = _mm512_set1_epi32((int)XXH_PRIME32_1);
+ __m512i *const xacc = (__m512i *)acc;
+ const __m512i prime32 = _mm512_set1_epi32((int)XXH_PRIME32_1);
/* xacc[0] ^= (xacc[0] >> 47) */
__m512i const acc_vec = *xacc;
@@ -3609,19 +4249,21 @@ XXH_FORCE_INLINE XXH_TARGET_AVX512 void XXH3_initCustomSecret_avx512(
int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m512i);
__m512i const seed = _mm512_mask_set1_epi64(
- _mm512_set1_epi64((xxh_i64)seed64), 0xAA, -(xxh_i64)seed64);
+ _mm512_set1_epi64((xxh_i64)seed64), 0xAA, (xxh_i64)(0U - seed64));
- XXH_ALIGN(64) const __m512i *const src = (const __m512i *)XXH3_kSecret;
- XXH_ALIGN(64) __m512i *const dest = (__m512i *)customSecret;
- int i;
+ const __m512i *const src = (const __m512i *)((const void *)XXH3_kSecret);
+ __m512i *const dest = (__m512i *)customSecret;
+ int i;
+ XXH_ASSERT(((size_t)src & 63) == 0); /* control alignment */
+ XXH_ASSERT(((size_t)dest & 63) == 0);
for (i = 0; i < nbRounds; ++i) {
/* GCC has a bug, _mm512_stream_load_si512 accepts 'void*', not 'void
- * const*', this will warn "discards ‘const’ qualifier". */
+ * const*', this will warn "discards 'const' qualifier". */
union {
- XXH_ALIGN(64) const __m512i *cp;
- XXH_ALIGN(64) void *p;
+ const __m512i *cp;
+ void * p;
} remote_const_void;
@@ -3635,13 +4277,14 @@ XXH_FORCE_INLINE XXH_TARGET_AVX512 void XXH3_initCustomSecret_avx512(
}
- #endif
+ #endif
- #if (XXH_VECTOR == XXH_AVX2) || defined(XXH_X86DISPATCH)
+ #if (XXH_VECTOR == XXH_AVX2) || \
+ (defined(XXH_DISPATCH_AVX2) && XXH_DISPATCH_AVX2 != 0)
- #ifndef XXH_TARGET_AVX2
- #define XXH_TARGET_AVX2 /* disable attribute target */
- #endif
+ #ifndef XXH_TARGET_AVX2
+ #define XXH_TARGET_AVX2 /* disable attribute target */
+ #endif
XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_accumulate_512_avx2(
void *XXH_RESTRICT acc, const void *XXH_RESTRICT input,
@@ -3650,7 +4293,7 @@ XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_accumulate_512_avx2(
XXH_ASSERT((((size_t)acc) & 31) == 0);
{
- XXH_ALIGN(32) __m256i *const xacc = (__m256i *)acc;
+ __m256i *const xacc = (__m256i *)acc;
/* Unaligned. This is mainly for pointer arithmetic, and because
* _mm256_loadu_si256 requires a const __m256i * pointer for some reason.
*/
@@ -3692,7 +4335,7 @@ XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_scrambleAcc_avx2(
XXH_ASSERT((((size_t)acc) & 31) == 0);
{
- XXH_ALIGN(32) __m256i *const xacc = (__m256i *)acc;
+ __m256i *const xacc = (__m256i *)acc;
/* Unaligned. This is mainly for pointer arithmetic, and because
* _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
const __m256i *const xsecret = (const __m256i *)secret;
@@ -3732,24 +4375,23 @@ XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_initCustomSecret_avx2(
XXH_PREFETCH(customSecret);
{
- __m256i const seed = _mm256_set_epi64x(-(xxh_i64)seed64, (xxh_i64)seed64,
- -(xxh_i64)seed64, (xxh_i64)seed64);
+ __m256i const seed =
+ _mm256_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64,
+ (xxh_i64)(0U - seed64), (xxh_i64)seed64);
- XXH_ALIGN(64) const __m256i *const src = (const __m256i *)XXH3_kSecret;
- XXH_ALIGN(64) __m256i * dest = (__m256i *)customSecret;
+ const __m256i *const src = (const __m256i *)((const void *)XXH3_kSecret);
+ __m256i * dest = (__m256i *)customSecret;
- #if defined(__GNUC__) || defined(__clang__)
+ #if defined(__GNUC__) || defined(__clang__)
/*
* On GCC & Clang, marking 'dest' as modified will cause the compiler:
* - do not extract the secret from sse registers in the internal loop
* - use less common registers, and avoid pushing these reg into stack
- * The asm hack causes Clang to assume that XXH3_kSecretPtr aliases with
- * customSecret, and on aarch64, this prevented LDP from merging two
- * loads together for free. Putting the loads together before the stores
- * properly generates LDP.
*/
- __asm__("" : "+r"(dest));
- #endif
+ XXH_COMPILER_GUARD(dest);
+ #endif
+ XXH_ASSERT(((size_t)src & 31) == 0); /* control alignment */
+ XXH_ASSERT(((size_t)dest & 31) == 0);
/* GCC -O2 need unroll loop manually */
dest[0] = _mm256_add_epi64(_mm256_stream_load_si256(src + 0), seed);
@@ -3763,13 +4405,14 @@ XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_initCustomSecret_avx2(
}
- #endif
+ #endif
- #if (XXH_VECTOR == XXH_SSE2) || defined(XXH_X86DISPATCH)
+ /* x86dispatch always generates SSE2 */
+ #if (XXH_VECTOR == XXH_SSE2) || defined(XXH_X86DISPATCH)
- #ifndef XXH_TARGET_SSE2
- #define XXH_TARGET_SSE2 /* disable attribute target */
- #endif
+ #ifndef XXH_TARGET_SSE2
+ #define XXH_TARGET_SSE2 /* disable attribute target */
+ #endif
XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_accumulate_512_sse2(
void *XXH_RESTRICT acc, const void *XXH_RESTRICT input,
@@ -3779,7 +4422,7 @@ XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_accumulate_512_sse2(
XXH_ASSERT((((size_t)acc) & 15) == 0);
{
- XXH_ALIGN(16) __m128i *const xacc = (__m128i *)acc;
+ __m128i *const xacc = (__m128i *)acc;
/* Unaligned. This is mainly for pointer arithmetic, and because
* _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
const __m128i *const xinput = (const __m128i *)input;
@@ -3820,7 +4463,7 @@ XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_scrambleAcc_sse2(
XXH_ASSERT((((size_t)acc) & 15) == 0);
{
- XXH_ALIGN(16) __m128i *const xacc = (__m128i *)acc;
+ __m128i *const xacc = (__m128i *)acc;
/* Unaligned. This is mainly for pointer arithmetic, and because
* _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
const __m128i *const xsecret = (const __m128i *)secret;
@@ -3859,30 +4502,34 @@ XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2(
int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m128i);
- #if defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER < 1900
- // MSVC 32bit mode does not support _mm_set_epi64x before 2015
+ #if defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER < 1900
+ /* MSVC 32bit mode does not support _mm_set_epi64x before 2015 */
XXH_ALIGN(16)
- const xxh_i64 seed64x2[2] = {(xxh_i64)seed64, -(xxh_i64)seed64};
+ const xxh_i64 seed64x2[2] = {(xxh_i64)seed64, (xxh_i64)(0U - seed64)};
__m128i const seed = _mm_load_si128((__m128i const *)seed64x2);
- #else
- __m128i const seed = _mm_set_epi64x(-(xxh_i64)seed64, (xxh_i64)seed64);
- #endif
+ #else
+ __m128i const seed =
+ _mm_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64);
+ #endif
int i;
- XXH_ALIGN(64) const float *const src = (float const *)XXH3_kSecret;
- XXH_ALIGN(XXH_SEC_ALIGN) __m128i *dest = (__m128i *)customSecret;
- #if defined(__GNUC__) || defined(__clang__)
+ const void *const src16 = XXH3_kSecret;
+ __m128i * dst16 = (__m128i *)customSecret;
+ #if defined(__GNUC__) || defined(__clang__)
/*
* On GCC & Clang, marking 'dest' as modified will cause the compiler:
* - do not extract the secret from sse registers in the internal loop
* - use less common registers, and avoid pushing these reg into stack
*/
- __asm__("" : "+r"(dest));
- #endif
+ XXH_COMPILER_GUARD(dst16);
+ #endif
+ XXH_ASSERT(((size_t)src16 & 15) == 0); /* control alignment */
+ XXH_ASSERT(((size_t)dst16 & 15) == 0);
for (i = 0; i < nbRounds; ++i) {
- dest[i] = _mm_add_epi64(_mm_castps_si128(_mm_load_ps(src + i * 4)), seed);
+ dst16[i] =
+ _mm_add_epi64(_mm_load_si128((const __m128i *)src16 + i), seed);
}
@@ -3890,9 +4537,9 @@ XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2(
}
- #endif
+ #endif
- #if (XXH_VECTOR == XXH_NEON)
+ #if (XXH_VECTOR == XXH_NEON)
XXH_FORCE_INLINE void XXH3_accumulate_512_neon(
void *XXH_RESTRICT acc, const void *XXH_RESTRICT input,
@@ -3901,7 +4548,7 @@ XXH_FORCE_INLINE void XXH3_accumulate_512_neon(
XXH_ASSERT((((size_t)acc) & 15) == 0);
{
- XXH_ALIGN(16) uint64x2_t *const xacc = (uint64x2_t *)acc;
+ uint64x2_t *const xacc = (uint64x2_t *)acc;
/* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7.
*/
uint8_t const *const xinput = (const uint8_t *)input;
@@ -3996,9 +4643,9 @@ XXH_FORCE_INLINE void XXH3_scrambleAcc_neon(void *XXH_RESTRICT acc,
}
- #endif
+ #endif
- #if (XXH_VECTOR == XXH_VSX)
+ #if (XXH_VECTOR == XXH_VSX)
XXH_FORCE_INLINE void XXH3_accumulate_512_vsx(void *XXH_RESTRICT acc,
const void *XXH_RESTRICT input,
@@ -4025,12 +4672,12 @@ XXH_FORCE_INLINE void XXH3_accumulate_512_vsx(void *XXH_RESTRICT acc,
xxh_u64x2 const product = XXH_vec_mulo((xxh_u32x4)data_key, shuffled);
xacc[i] += product;
- /* swap high and low halves */
- #ifdef __s390x__
+ /* swap high and low halves */
+ #ifdef __s390x__
xacc[i] += vec_permi(data_vec, data_vec, 2);
- #else
+ #else
xacc[i] += vec_xxpermdi(data_vec, data_vec, 2);
- #endif
+ #endif
}
@@ -4075,7 +4722,7 @@ XXH_FORCE_INLINE void XXH3_scrambleAcc_vsx(void *XXH_RESTRICT acc,
}
- #endif
+ #endif
/* scalar variants - universal */
@@ -4083,7 +4730,6 @@ XXH_FORCE_INLINE void XXH3_accumulate_512_scalar(
void *XXH_RESTRICT acc, const void *XXH_RESTRICT input,
const void *XXH_RESTRICT secret) {
- XXH_ALIGN(XXH_ACC_ALIGN)
xxh_u64 *const xacc = (xxh_u64 *)acc; /* presumed aligned */
const xxh_u8 *const xinput =
(const xxh_u8 *)input; /* no alignment restriction */
@@ -4105,7 +4751,6 @@ XXH_FORCE_INLINE void XXH3_accumulate_512_scalar(
XXH_FORCE_INLINE void XXH3_scrambleAcc_scalar(void *XXH_RESTRICT acc,
const void *XXH_RESTRICT secret) {
- XXH_ALIGN(XXH_ACC_ALIGN)
xxh_u64 *const xacc = (xxh_u64 *)acc; /* presumed aligned */
const xxh_u8 *const xsecret =
(const xxh_u8 *)secret; /* no alignment restriction */
@@ -4135,7 +4780,7 @@ XXH_FORCE_INLINE void XXH3_initCustomSecret_scalar(
const xxh_u8 *kSecretPtr = XXH3_kSecret;
XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
- #if defined(__clang__) && defined(__aarch64__)
+ #if defined(__clang__) && defined(__aarch64__)
/*
* UGLY HACK:
* Clang generates a bunch of MOV/MOVK pairs for aarch64, and they are
@@ -4164,8 +4809,8 @@ XXH_FORCE_INLINE void XXH3_initCustomSecret_scalar(
* without hack: 2654.4 MB/s
* with hack: 3202.9 MB/s
*/
- __asm__("" : "+r"(kSecretPtr));
- #endif
+ XXH_COMPILER_GUARD(kSecretPtr);
+ #endif
/*
* Note: in debug mode, this overrides the asm optimization
* and Clang will emit MOVK chains again.
@@ -4200,55 +4845,55 @@ typedef void (*XXH3_f_accumulate_512)(void *XXH_RESTRICT, const void *,
typedef void (*XXH3_f_scrambleAcc)(void *XXH_RESTRICT, const void *);
typedef void (*XXH3_f_initCustomSecret)(void *XXH_RESTRICT, xxh_u64);
- #if (XXH_VECTOR == XXH_AVX512)
+ #if (XXH_VECTOR == XXH_AVX512)
- #define XXH3_accumulate_512 XXH3_accumulate_512_avx512
- #define XXH3_scrambleAcc XXH3_scrambleAcc_avx512
- #define XXH3_initCustomSecret XXH3_initCustomSecret_avx512
+ #define XXH3_accumulate_512 XXH3_accumulate_512_avx512
+ #define XXH3_scrambleAcc XXH3_scrambleAcc_avx512
+ #define XXH3_initCustomSecret XXH3_initCustomSecret_avx512
- #elif (XXH_VECTOR == XXH_AVX2)
+ #elif (XXH_VECTOR == XXH_AVX2)
- #define XXH3_accumulate_512 XXH3_accumulate_512_avx2
- #define XXH3_scrambleAcc XXH3_scrambleAcc_avx2
- #define XXH3_initCustomSecret XXH3_initCustomSecret_avx2
+ #define XXH3_accumulate_512 XXH3_accumulate_512_avx2
+ #define XXH3_scrambleAcc XXH3_scrambleAcc_avx2
+ #define XXH3_initCustomSecret XXH3_initCustomSecret_avx2
- #elif (XXH_VECTOR == XXH_SSE2)
+ #elif (XXH_VECTOR == XXH_SSE2)
- #define XXH3_accumulate_512 XXH3_accumulate_512_sse2
- #define XXH3_scrambleAcc XXH3_scrambleAcc_sse2
- #define XXH3_initCustomSecret XXH3_initCustomSecret_sse2
+ #define XXH3_accumulate_512 XXH3_accumulate_512_sse2
+ #define XXH3_scrambleAcc XXH3_scrambleAcc_sse2
+ #define XXH3_initCustomSecret XXH3_initCustomSecret_sse2
- #elif (XXH_VECTOR == XXH_NEON)
+ #elif (XXH_VECTOR == XXH_NEON)
- #define XXH3_accumulate_512 XXH3_accumulate_512_neon
- #define XXH3_scrambleAcc XXH3_scrambleAcc_neon
- #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+ #define XXH3_accumulate_512 XXH3_accumulate_512_neon
+ #define XXH3_scrambleAcc XXH3_scrambleAcc_neon
+ #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
- #elif (XXH_VECTOR == XXH_VSX)
+ #elif (XXH_VECTOR == XXH_VSX)
- #define XXH3_accumulate_512 XXH3_accumulate_512_vsx
- #define XXH3_scrambleAcc XXH3_scrambleAcc_vsx
- #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+ #define XXH3_accumulate_512 XXH3_accumulate_512_vsx
+ #define XXH3_scrambleAcc XXH3_scrambleAcc_vsx
+ #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
- #else /* scalar */
+ #else /* scalar */
- #define XXH3_accumulate_512 XXH3_accumulate_512_scalar
- #define XXH3_scrambleAcc XXH3_scrambleAcc_scalar
- #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+ #define XXH3_accumulate_512 XXH3_accumulate_512_scalar
+ #define XXH3_scrambleAcc XXH3_scrambleAcc_scalar
+ #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
- #endif
+ #endif
- #ifndef XXH_PREFETCH_DIST
- #ifdef __clang__
- #define XXH_PREFETCH_DIST 320
- #else
- #if (XXH_VECTOR == XXH_AVX512)
- #define XXH_PREFETCH_DIST 512
+ #ifndef XXH_PREFETCH_DIST
+ #ifdef __clang__
+ #define XXH_PREFETCH_DIST 320
#else
- #define XXH_PREFETCH_DIST 384
- #endif
- #endif /* __clang__ */
- #endif /* XXH_PREFETCH_DIST */
+ #if (XXH_VECTOR == XXH_AVX512)
+ #define XXH_PREFETCH_DIST 512
+ #else
+ #define XXH_PREFETCH_DIST 384
+ #endif
+ #endif /* __clang__ */
+ #endif /* XXH_PREFETCH_DIST */
/*
* XXH3_accumulate()
@@ -4308,8 +4953,9 @@ XXH_FORCE_INLINE void XXH3_hashLong_internal_loop(
{
const xxh_u8 *const p = input + len - XXH_STRIPE_LEN;
- #define XXH_SECRET_LASTACC_START \
- 7 /* not aligned on 8, last secret is different from acc & scrambler */
+ #define XXH_SECRET_LASTACC_START \
+ 7 /* not aligned on 8, last secret is different from acc & scrambler \
+ */
f_acc512(acc, p,
secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START);
@@ -4337,10 +4983,10 @@ static XXH64_hash_t XXH3_mergeAccs(const xxh_u64 *XXH_RESTRICT acc,
for (i = 0; i < 4; i++) {
result64 += XXH3_mix2Accs(acc + 2 * i, secret + 16 * i);
- #if defined(__clang__) /* Clang */ \
- && (defined(__arm__) || defined(__thumb__)) /* ARMv7 */ \
- && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \
- && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */
+ #if defined(__clang__) /* Clang */ \
+ && (defined(__arm__) || defined(__thumb__)) /* ARMv7 */ \
+ && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \
+ && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */
/*
* UGLY HACK:
* Prevent autovectorization on Clang ARMv7-a. Exact same problem as
@@ -4349,8 +4995,8 @@ static XXH64_hash_t XXH3_mergeAccs(const xxh_u64 *XXH_RESTRICT acc,
* without hack: 2063.7 MB/s
* with hack: 2560.7 MB/s
*/
- __asm__("" : "+r"(result64));
- #endif
+ XXH_COMPILER_GUARD(result64);
+ #endif
}
@@ -4358,13 +5004,13 @@ static XXH64_hash_t XXH3_mergeAccs(const xxh_u64 *XXH_RESTRICT acc,
}
- #define XXH3_INIT_ACC \
- { \
- \
- XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3, \
- XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, XXH_PRIME32_1 \
- \
- }
+ #define XXH3_INIT_ACC \
+ { \
+ \
+ XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3, \
+ XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, XXH_PRIME32_1 \
+ \
+ }
XXH_FORCE_INLINE XXH64_hash_t XXH3_hashLong_64b_internal(
const void *XXH_RESTRICT input, size_t len, const void *XXH_RESTRICT secret,
@@ -4379,9 +5025,9 @@ XXH_FORCE_INLINE XXH64_hash_t XXH3_hashLong_64b_internal(
/* converge into final hash */
XXH_STATIC_ASSERT(sizeof(acc) == 64);
- /* do not align on 8, so that the secret is different from the accumulator
- */
- #define XXH_SECRET_MERGEACCS_START 11
+ /* do not align on 8, so that the secret is different from the accumulator
+ */
+ #define XXH_SECRET_MERGEACCS_START 11
XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
return XXH3_mergeAccs(acc,
(const xxh_u8 *)secret + XXH_SECRET_MERGEACCS_START,
@@ -4501,6 +5147,7 @@ XXH3_64bits_internal(const void *XXH_RESTRICT input, size_t len,
/* === Public entry point === */
+/*! @ingroup xxh3_family */
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void *input, size_t len) {
return XXH3_64bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret),
@@ -4508,6 +5155,7 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void *input, size_t len) {
}
+/*! @ingroup xxh3_family */
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(const void *input,
size_t len,
const void *secret,
@@ -4518,6 +5166,7 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(const void *input,
}
+/*! @ingroup xxh3_family */
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed(const void *input, size_t len,
XXH64_hash_t seed) {
@@ -4603,6 +5252,7 @@ static void XXH_alignedFree(void *p) {
}
+/*! @ingroup xxh3_family */
XXH_PUBLIC_API XXH3_state_t *XXH3_createState(void) {
XXH3_state_t *const state =
@@ -4613,6 +5263,7 @@ XXH_PUBLIC_API XXH3_state_t *XXH3_createState(void) {
}
+/*! @ingroup xxh3_family */
XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t *statePtr) {
XXH_alignedFree(statePtr);
@@ -4620,6 +5271,7 @@ XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t *statePtr) {
}
+/*! @ingroup xxh3_family */
XXH_PUBLIC_API void XXH3_copyState(XXH3_state_t * dst_state,
const XXH3_state_t *src_state) {
@@ -4627,9 +5279,8 @@ XXH_PUBLIC_API void XXH3_copyState(XXH3_state_t * dst_state,
}
-static void XXH3_64bits_reset_internal(XXH3_state_t *statePtr,
- XXH64_hash_t seed, const void *secret,
- size_t secretSize) {
+static void XXH3_reset_internal(XXH3_state_t *statePtr, XXH64_hash_t seed,
+ const void *secret, size_t secretSize) {
size_t const initStart = offsetof(XXH3_state_t, bufferedSize);
size_t const initLength =
@@ -4654,26 +5305,28 @@ static void XXH3_64bits_reset_internal(XXH3_state_t *statePtr,
}
+/*! @ingroup xxh3_family */
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH3_state_t *statePtr) {
if (statePtr == NULL) return XXH_ERROR;
- XXH3_64bits_reset_internal(statePtr, 0, XXH3_kSecret,
- XXH_SECRET_DEFAULT_SIZE);
+ XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
return XXH_OK;
}
+/*! @ingroup xxh3_family */
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(
XXH3_state_t *statePtr, const void *secret, size_t secretSize) {
if (statePtr == NULL) return XXH_ERROR;
- XXH3_64bits_reset_internal(statePtr, 0, secret, secretSize);
+ XXH3_reset_internal(statePtr, 0, secret, secretSize);
if (secret == NULL) return XXH_ERROR;
if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
return XXH_OK;
}
+/*! @ingroup xxh3_family */
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH3_state_t *statePtr,
XXH64_hash_t seed) {
@@ -4681,7 +5334,7 @@ XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH3_state_t *statePtr,
if (seed == 0) return XXH3_64bits_reset(statePtr);
if (seed != statePtr->seed)
XXH3_initCustomSecret(statePtr->customSecret, seed);
- XXH3_64bits_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE);
+ XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE);
return XXH_OK;
}
@@ -4733,12 +5386,12 @@ XXH_FORCE_INLINE XXH_errorcode XXH3_update(XXH3_state_t *state,
XXH3_f_scrambleAcc f_scramble) {
if (input == NULL)
- #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && \
- (XXH_ACCEPT_NULL_INPUT_POINTER >= 1)
+ #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && \
+ (XXH_ACCEPT_NULL_INPUT_POINTER >= 1)
return XXH_OK;
- #else
+ #else
return XXH_ERROR;
- #endif
+ #endif
{
@@ -4747,6 +5400,7 @@ XXH_FORCE_INLINE XXH_errorcode XXH3_update(XXH3_state_t *state,
(state->extSecret == NULL) ? state->customSecret : state->extSecret;
state->totalLen += len;
+ XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE);
if (state->bufferedSize + len <=
XXH3_INTERNALBUFFER_SIZE) { /* fill in tmp buffer */
@@ -4756,10 +5410,10 @@ XXH_FORCE_INLINE XXH_errorcode XXH3_update(XXH3_state_t *state,
}
- /* total input is now > XXH3_INTERNALBUFFER_SIZE */
+ /* total input is now > XXH3_INTERNALBUFFER_SIZE */
- #define XXH3_INTERNALBUFFER_STRIPES \
- (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN)
+ #define XXH3_INTERNALBUFFER_STRIPES \
+ (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN)
XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % XXH_STRIPE_LEN ==
0); /* clean multiple */
@@ -4783,7 +5437,7 @@ XXH_FORCE_INLINE XXH_errorcode XXH3_update(XXH3_state_t *state,
XXH_ASSERT(input < bEnd);
/* Consume input by a multiple of internal buffer size */
- if (input + XXH3_INTERNALBUFFER_SIZE < bEnd) {
+ if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) {
const xxh_u8 *const limit = bEnd - XXH3_INTERNALBUFFER_SIZE;
do {
@@ -4814,6 +5468,7 @@ XXH_FORCE_INLINE XXH_errorcode XXH3_update(XXH3_state_t *state,
}
+/*! @ingroup xxh3_family */
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update(XXH3_state_t *state,
const void *input, size_t len) {
@@ -4859,6 +5514,7 @@ XXH_FORCE_INLINE void XXH3_digest_long(XXH64_hash_t * acc,
}
+/*! @ingroup xxh3_family */
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest(const XXH3_state_t *state) {
const unsigned char *const secret =
@@ -4881,8 +5537,9 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest(const XXH3_state_t *state) {
}
- #define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x))
+ #define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x))
+/*! @ingroup xxh3_family */
XXH_PUBLIC_API void XXH3_generateSecret(void * secretBuffer,
const void *customSeed,
size_t customSeedSize) {
@@ -5398,6 +6055,7 @@ XXH3_128bits_internal(const void *input, size_t len, XXH64_hash_t seed64,
/* === Public XXH128 API === */
+/*! @ingroup xxh3_family */
XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void *input, size_t len) {
return XXH3_128bits_internal(input, len, 0, XXH3_kSecret,
@@ -5406,6 +6064,7 @@ XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void *input, size_t len) {
}
+/*! @ingroup xxh3_family */
XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSecret(const void *input,
size_t len,
const void *secret,
@@ -5416,6 +6075,7 @@ XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSecret(const void *input,
}
+/*! @ingroup xxh3_family */
XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSeed(const void * input,
size_t len,
XXH64_hash_t seed) {
@@ -5426,6 +6086,7 @@ XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSeed(const void * input,
}
+/*! @ingroup xxh3_family */
XXH_PUBLIC_API XXH128_hash_t XXH128(const void *input, size_t len,
XXH64_hash_t seed) {
@@ -5437,37 +6098,31 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void *input, size_t len,
/*
* All the functions are actually the same as for 64-bit streaming variant.
- * The only difference is the finalizatiom routine.
+ * The only difference is the finalization routine.
*/
-static void XXH3_128bits_reset_internal(XXH3_state_t *statePtr,
- XXH64_hash_t seed, const void *secret,
- size_t secretSize) {
-
- XXH3_64bits_reset_internal(statePtr, seed, secret, secretSize);
-
-}
-
+/*! @ingroup xxh3_family */
XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH3_state_t *statePtr) {
if (statePtr == NULL) return XXH_ERROR;
- XXH3_128bits_reset_internal(statePtr, 0, XXH3_kSecret,
- XXH_SECRET_DEFAULT_SIZE);
+ XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
return XXH_OK;
}
+/*! @ingroup xxh3_family */
XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(
XXH3_state_t *statePtr, const void *secret, size_t secretSize) {
if (statePtr == NULL) return XXH_ERROR;
- XXH3_128bits_reset_internal(statePtr, 0, secret, secretSize);
+ XXH3_reset_internal(statePtr, 0, secret, secretSize);
if (secret == NULL) return XXH_ERROR;
if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
return XXH_OK;
}
+/*! @ingroup xxh3_family */
XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH3_state_t *statePtr,
XXH64_hash_t seed) {
@@ -5475,11 +6130,12 @@ XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH3_state_t *statePtr,
if (seed == 0) return XXH3_128bits_reset(statePtr);
if (seed != statePtr->seed)
XXH3_initCustomSecret(statePtr->customSecret, seed);
- XXH3_128bits_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE);
+ XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE);
return XXH_OK;
}
+/*! @ingroup xxh3_family */
XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update(XXH3_state_t *state,
const void * input,
size_t len) {
@@ -5489,6 +6145,7 @@ XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update(XXH3_state_t *state,
}
+/*! @ingroup xxh3_family */
XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest(const XXH3_state_t *state) {
const unsigned char *const secret =
@@ -5524,11 +6181,12 @@ XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest(const XXH3_state_t *state) {
}
- /* 128-bit utility functions */
+ /* 128-bit utility functions */
- #include <string.h> /* memcmp, memcpy */
+ #include <string.h> /* memcmp, memcpy */
/* return : 1 is equal, 0 if different */
+/*! @ingroup xxh3_family */
XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2) {
/* note : XXH128_hash_t is compact, it has no padding byte */
@@ -5540,6 +6198,7 @@ XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2) {
* return : >0 if *h128_1 > *h128_2
* <0 if *h128_1 < *h128_2
* =0 if *h128_1 == *h128_2 */
+/*! @ingroup xxh3_family */
XXH_PUBLIC_API int XXH128_cmp(const void *h128_1, const void *h128_2) {
XXH128_hash_t const h1 = *(const XXH128_hash_t *)h128_1;
@@ -5552,6 +6211,7 @@ XXH_PUBLIC_API int XXH128_cmp(const void *h128_1, const void *h128_2) {
}
/*====== Canonical representation ======*/
+/*! @ingroup xxh3_family */
XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH128_canonical_t *dst,
XXH128_hash_t hash) {
@@ -5568,6 +6228,7 @@ XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH128_canonical_t *dst,
}
+/*! @ingroup xxh3_family */
XXH_PUBLIC_API XXH128_hash_t
XXH128_hashFromCanonical(const XXH128_canonical_t *src) {
@@ -5578,16 +6239,21 @@ XXH128_hashFromCanonical(const XXH128_canonical_t *src) {
}
- /* Pop our optimization override from above */
- #if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
- && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
- && defined(__OPTIMIZE__) && \
- !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */
- #pragma GCC pop_options
- #endif
+ /* Pop our optimization override from above */
+ #if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
+ && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
+ && defined(__OPTIMIZE__) && \
+ !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */
+ #pragma GCC pop_options
+ #endif
- #endif /* XXH_NO_LONG_LONG */
+ #endif /* XXH_NO_LONG_LONG */
+
+ #endif /* XXH_NO_XXH3 */
+/*!
+ * @}
+ */
#endif /* XXH_IMPLEMENTATION */
#if defined(__cplusplus)
diff --git a/instrumentation/afl-compiler-rt.o.c b/instrumentation/afl-compiler-rt.o.c
index b2802a29..91c690c0 100644
--- a/instrumentation/afl-compiler-rt.o.c
+++ b/instrumentation/afl-compiler-rt.o.c
@@ -22,6 +22,10 @@
#include "cmplog.h"
#include "llvm-alternative-coverage.h"
+#define XXH_INLINE_ALL
+#include "xxhash.h"
+#undef XXH_INLINE_ALL
+
#include <stdio.h>
#include <stdlib.h>
#include <signal.h>
@@ -154,6 +158,8 @@ static void at_exit(int signal) {
}
+#define default_hash(a, b) XXH3_64bits(a, b)
+
/* Uninspired gcc plugin instrumentation */
void __afl_trace(const u32 x) {
@@ -669,7 +675,7 @@ static void __afl_start_snapshots(void) {
/* Phone home and tell the parent that we're OK. If parent isn't there,
assume we're not running in forkserver mode and just execute program. */
- status |= (FS_OPT_ENABLED | FS_OPT_SNAPSHOT);
+ status |= (FS_OPT_ENABLED | FS_OPT_SNAPSHOT | FS_OPT_NEWCMPLOG);
if (__afl_sharedmem_fuzzing != 0) status |= FS_OPT_SHDMEM_FUZZ;
if (__afl_map_size <= FS_OPT_MAX_MAPSIZE)
status |= (FS_OPT_SET_MAPSIZE(__afl_map_size) | FS_OPT_MAPSIZE);
@@ -935,7 +941,12 @@ static void __afl_start_forkserver(void) {
}
if (__afl_sharedmem_fuzzing != 0) { status_for_fsrv |= FS_OPT_SHDMEM_FUZZ; }
- if (status_for_fsrv) { status_for_fsrv |= (FS_OPT_ENABLED); }
+ if (status_for_fsrv) {
+
+ status_for_fsrv |= (FS_OPT_ENABLED | FS_OPT_NEWCMPLOG);
+
+ }
+
memcpy(tmp, &status_for_fsrv, 4);
/* Phone home and tell the parent that we're OK. If parent isn't there,
@@ -1499,8 +1510,7 @@ void __cmplog_ins_hook1(uint8_t arg1, uint8_t arg2, uint8_t attr) {
if (unlikely(!__afl_cmp_map || arg1 == arg2)) return;
uintptr_t k = (uintptr_t)__builtin_return_address(0);
- k = (k >> 4) ^ (k << 8);
- k &= CMP_MAP_W - 1;
+ k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1));
u32 hits;
@@ -1530,8 +1540,7 @@ void __cmplog_ins_hook2(uint16_t arg1, uint16_t arg2, uint8_t attr) {
if (unlikely(!__afl_cmp_map || arg1 == arg2)) return;
uintptr_t k = (uintptr_t)__builtin_return_address(0);
- k = (k >> 4) ^ (k << 8);
- k &= CMP_MAP_W - 1;
+ k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1));
u32 hits;
@@ -1569,8 +1578,7 @@ void __cmplog_ins_hook4(uint32_t arg1, uint32_t arg2, uint8_t attr) {
if (unlikely(!__afl_cmp_map || arg1 == arg2)) return;
uintptr_t k = (uintptr_t)__builtin_return_address(0);
- k = (k >> 4) ^ (k << 8);
- k &= CMP_MAP_W - 1;
+ k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1));
u32 hits;
@@ -1608,8 +1616,7 @@ void __cmplog_ins_hook8(uint64_t arg1, uint64_t arg2, uint8_t attr) {
if (unlikely(!__afl_cmp_map || arg1 == arg2)) return;
uintptr_t k = (uintptr_t)__builtin_return_address(0);
- k = (k >> 4) ^ (k << 8);
- k &= CMP_MAP_W - 1;
+ k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1));
u32 hits;
@@ -1652,8 +1659,7 @@ void __cmplog_ins_hookN(uint128_t arg1, uint128_t arg2, uint8_t attr,
if (unlikely(!__afl_cmp_map || arg1 == arg2)) return;
uintptr_t k = (uintptr_t)__builtin_return_address(0);
- k = (k >> 4) ^ (k << 8);
- k &= CMP_MAP_W - 1;
+ k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1));
u32 hits;
@@ -1696,8 +1702,7 @@ void __cmplog_ins_hook16(uint128_t arg1, uint128_t arg2, uint8_t attr) {
if (likely(!__afl_cmp_map)) return;
uintptr_t k = (uintptr_t)__builtin_return_address(0);
- k = (k >> 4) ^ (k << 8);
- k &= CMP_MAP_W - 1;
+ k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1));
u32 hits;
@@ -1802,8 +1807,8 @@ void __sanitizer_cov_trace_switch(uint64_t val, uint64_t *cases) {
for (uint64_t i = 0; i < cases[0]; i++) {
uintptr_t k = (uintptr_t)__builtin_return_address(0) + i;
- k = (k >> 4) ^ (k << 8);
- k &= CMP_MAP_W - 1;
+ k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) &
+ (CMP_MAP_W - 1));
u32 hits;
@@ -1880,6 +1885,159 @@ static int area_is_valid(void *ptr, size_t len) {
}
+void __cmplog_rtn_hook_n(u8 *ptr1, u8 *ptr2, u64 len) {
+
+ /*
+ u32 i;
+ if (area_is_valid(ptr1, 32) <= 0 || area_is_valid(ptr2, 32) <= 0) return;
+ fprintf(stderr, "rtn_n len=%u arg0=", len);
+ for (i = 0; i < len; i++)
+ fprintf(stderr, "%02x", ptr1[i]);
+ fprintf(stderr, " arg1=");
+ for (i = 0; i < len; i++)
+ fprintf(stderr, "%02x", ptr2[i]);
+ fprintf(stderr, "\n");
+ */
+
+ if (likely(!__afl_cmp_map)) return;
+ // fprintf(stderr, "RTN1 %p %p %u\n", ptr1, ptr2, len);
+ if (unlikely(!len)) return;
+ int l = MIN(31, len);
+
+ // fprintf(stderr, "RTN2 %u\n", l);
+ uintptr_t k = (uintptr_t)__builtin_return_address(0);
+ k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1));
+
+ u32 hits;
+
+ if (__afl_cmp_map->headers[k].type != CMP_TYPE_RTN) {
+
+ __afl_cmp_map->headers[k].type = CMP_TYPE_RTN;
+ __afl_cmp_map->headers[k].hits = 1;
+ __afl_cmp_map->headers[k].shape = l - 1;
+ hits = 0;
+
+ } else {
+
+ hits = __afl_cmp_map->headers[k].hits++;
+
+ if (__afl_cmp_map->headers[k].shape < l) {
+
+ __afl_cmp_map->headers[k].shape = l - 1;
+
+ }
+
+ }
+
+ struct cmpfn_operands *cmpfn = (struct cmpfn_operands *)__afl_cmp_map->log[k];
+ hits &= CMP_MAP_RTN_H - 1;
+
+ cmpfn[hits].v0_len = l;
+ cmpfn[hits].v1_len = l;
+ __builtin_memcpy(cmpfn[hits].v0, ptr1, l);
+ __builtin_memcpy(cmpfn[hits].v1, ptr2, l);
+ // fprintf(stderr, "RTN3\n");
+
+}
+
+void __cmplog_rtn_hook_strn(u8 *ptr1, u8 *ptr2, u64 len) {
+
+ /*
+ if (area_is_valid(ptr1, 32) <= 0 || area_is_valid(ptr2, 32) <= 0) return;
+ fprintf(stderr, "rtn_strn len=%u arg0=%s arg1=%s\n", len, ptr1, ptr2);
+ */
+
+ if (likely(!__afl_cmp_map)) return;
+ // fprintf(stderr, "RTN1 %p %p %u\n", ptr1, ptr2, len);
+ if (unlikely(!len)) return;
+ int l = MIN(31, len + 1);
+
+ // fprintf(stderr, "RTN2 %u\n", l);
+ uintptr_t k = (uintptr_t)__builtin_return_address(0);
+ k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1));
+
+ u32 hits;
+
+ if (__afl_cmp_map->headers[k].type != CMP_TYPE_RTN) {
+
+ __afl_cmp_map->headers[k].type = CMP_TYPE_RTN;
+ __afl_cmp_map->headers[k].hits = 1;
+ __afl_cmp_map->headers[k].shape = l - 1;
+ hits = 0;
+
+ } else {
+
+ hits = __afl_cmp_map->headers[k].hits++;
+
+ if (__afl_cmp_map->headers[k].shape < l) {
+
+ __afl_cmp_map->headers[k].shape = l - 1;
+
+ }
+
+ }
+
+ struct cmpfn_operands *cmpfn = (struct cmpfn_operands *)__afl_cmp_map->log[k];
+ hits &= CMP_MAP_RTN_H - 1;
+
+ cmpfn[hits].v0_len = 0x80 + l;
+ cmpfn[hits].v1_len = 0x80 + l;
+ __builtin_memcpy(cmpfn[hits].v0, ptr1, l);
+ __builtin_memcpy(cmpfn[hits].v1, ptr2, l);
+ // fprintf(stderr, "RTN3\n");
+
+}
+
+void __cmplog_rtn_hook_str(u8 *ptr1, u8 *ptr2) {
+
+ /*
+ if (area_is_valid(ptr1, 32) <= 0 || area_is_valid(ptr2, 32) <= 0) return;
+ fprintf(stderr, "rtn_str arg0=%s arg1=%s\n", ptr1, ptr2);
+ */
+
+ if (likely(!__afl_cmp_map)) return;
+ // fprintf(stderr, "RTN1 %p %p\n", ptr1, ptr2);
+ if (unlikely(!ptr1 || !ptr2)) return;
+ int len1 = MIN(31, strlen(ptr1) + 1);
+ int len2 = MIN(31, strlen(ptr2) + 1);
+ int l = MIN(MAX(len1, len2), 31);
+
+ // fprintf(stderr, "RTN2 %u\n", l);
+ uintptr_t k = (uintptr_t)__builtin_return_address(0);
+ k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1));
+
+ u32 hits;
+
+ if (__afl_cmp_map->headers[k].type != CMP_TYPE_RTN) {
+
+ __afl_cmp_map->headers[k].type = CMP_TYPE_RTN;
+ __afl_cmp_map->headers[k].hits = 1;
+ __afl_cmp_map->headers[k].shape = l - 1;
+ hits = 0;
+
+ } else {
+
+ hits = __afl_cmp_map->headers[k].hits++;
+
+ if (__afl_cmp_map->headers[k].shape < l) {
+
+ __afl_cmp_map->headers[k].shape = l - 1;
+
+ }
+
+ }
+
+ struct cmpfn_operands *cmpfn = (struct cmpfn_operands *)__afl_cmp_map->log[k];
+ hits &= CMP_MAP_RTN_H - 1;
+
+ cmpfn[hits].v0_len = 0x80 + len1;
+ cmpfn[hits].v1_len = 0x80 + len2;
+ __builtin_memcpy(cmpfn[hits].v0, ptr1, len1);
+ __builtin_memcpy(cmpfn[hits].v1, ptr2, len2);
+ // fprintf(stderr, "RTN3\n");
+
+}
+
void __cmplog_rtn_hook(u8 *ptr1, u8 *ptr2) {
/*
@@ -1900,12 +2058,11 @@ void __cmplog_rtn_hook(u8 *ptr1, u8 *ptr2) {
if ((l1 = area_is_valid(ptr1, 32)) <= 0 ||
(l2 = area_is_valid(ptr2, 32)) <= 0)
return;
- int len = MIN(l1, l2);
+ int len = MIN(31, MIN(l1, l2));
// fprintf(stderr, "RTN2 %u\n", len);
uintptr_t k = (uintptr_t)__builtin_return_address(0);
- k = (k >> 4) ^ (k << 8);
- k &= CMP_MAP_W - 1;
+ k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1));
u32 hits;
@@ -1928,11 +2085,13 @@ void __cmplog_rtn_hook(u8 *ptr1, u8 *ptr2) {
}
+ struct cmpfn_operands *cmpfn = (struct cmpfn_operands *)__afl_cmp_map->log[k];
hits &= CMP_MAP_RTN_H - 1;
- __builtin_memcpy(((struct cmpfn_operands *)__afl_cmp_map->log[k])[hits].v0,
- ptr1, len);
- __builtin_memcpy(((struct cmpfn_operands *)__afl_cmp_map->log[k])[hits].v1,
- ptr2, len);
+
+ cmpfn[hits].v0_len = len;
+ cmpfn[hits].v1_len = len;
+ __builtin_memcpy(cmpfn[hits].v0, ptr1, len);
+ __builtin_memcpy(cmpfn[hits].v1, ptr2, len);
// fprintf(stderr, "RTN3\n");
}
diff --git a/instrumentation/cmplog-routines-pass.cc b/instrumentation/cmplog-routines-pass.cc
index 01b7a373..fb514edc 100644
--- a/instrumentation/cmplog-routines-pass.cc
+++ b/instrumentation/cmplog-routines-pass.cc
@@ -87,12 +87,14 @@ char CmpLogRoutines::ID = 0;
bool CmpLogRoutines::hookRtns(Module &M) {
- std::vector<CallInst *> calls, llvmStdStd, llvmStdC, gccStdStd, gccStdC;
- LLVMContext & C = M.getContext();
+ std::vector<CallInst *> calls, llvmStdStd, llvmStdC, gccStdStd, gccStdC,
+ Memcmp, Strcmp, Strncmp;
+ LLVMContext &C = M.getContext();
Type *VoidTy = Type::getVoidTy(C);
// PointerType *VoidPtrTy = PointerType::get(VoidTy, 0);
IntegerType *Int8Ty = IntegerType::getInt8Ty(C);
+ IntegerType *Int64Ty = IntegerType::getInt64Ty(C);
PointerType *i8PtrTy = PointerType::get(Int8Ty, 0);
#if LLVM_VERSION_MAJOR < 9
@@ -184,6 +186,60 @@ bool CmpLogRoutines::hookRtns(Module &M) {
FunctionCallee cmplogGccStdC = c4;
#endif
+#if LLVM_VERSION_MAJOR < 9
+ Constant *
+#else
+ FunctionCallee
+#endif
+ c5 = M.getOrInsertFunction("__cmplog_rtn_hook_n", VoidTy, i8PtrTy,
+ i8PtrTy, Int64Ty
+#if LLVM_VERSION_MAJOR < 5
+ ,
+ NULL
+#endif
+ );
+#if LLVM_VERSION_MAJOR < 9
+ Function *cmplogHookFnN = cast<Function>(c5);
+#else
+ FunctionCallee cmplogHookFnN = c5;
+#endif
+
+#if LLVM_VERSION_MAJOR < 9
+ Constant *
+#else
+ FunctionCallee
+#endif
+ c6 = M.getOrInsertFunction("__cmplog_rtn_hook_strn", VoidTy, i8PtrTy,
+ i8PtrTy, Int64Ty
+#if LLVM_VERSION_MAJOR < 5
+ ,
+ NULL
+#endif
+ );
+#if LLVM_VERSION_MAJOR < 9
+ Function *cmplogHookFnStrN = cast<Function>(c6);
+#else
+ FunctionCallee cmplogHookFnStrN = c6;
+#endif
+
+#if LLVM_VERSION_MAJOR < 9
+ Constant *
+#else
+ FunctionCallee
+#endif
+ c7 = M.getOrInsertFunction("__cmplog_rtn_hook_str", VoidTy, i8PtrTy,
+ i8PtrTy
+#if LLVM_VERSION_MAJOR < 5
+ ,
+ NULL
+#endif
+ );
+#if LLVM_VERSION_MAJOR < 9
+ Function *cmplogHookFnStr = cast<Function>(c7);
+#else
+ FunctionCallee cmplogHookFnStr = c7;
+#endif
+
GlobalVariable *AFLCmplogPtr = M.getNamedGlobal("__afl_cmp_map");
if (!AFLCmplogPtr) {
@@ -214,12 +270,93 @@ bool CmpLogRoutines::hookRtns(Module &M) {
if (callInst->getCallingConv() != llvm::CallingConv::C) continue;
FunctionType *FT = Callee->getFunctionType();
+ std::string FuncName = Callee->getName().str();
bool isPtrRtn = FT->getNumParams() >= 2 &&
!FT->getReturnType()->isVoidTy() &&
FT->getParamType(0) == FT->getParamType(1) &&
FT->getParamType(0)->isPointerTy();
+ bool isPtrRtnN = FT->getNumParams() >= 3 &&
+ !FT->getReturnType()->isVoidTy() &&
+ FT->getParamType(0) == FT->getParamType(1) &&
+ FT->getParamType(0)->isPointerTy() &&
+ FT->getParamType(2)->isIntegerTy();
+ if (isPtrRtnN) {
+
+ auto intTyOp =
+ dyn_cast<IntegerType>(callInst->getArgOperand(2)->getType());
+ if (intTyOp) {
+
+ if (intTyOp->getBitWidth() != 32 &&
+ intTyOp->getBitWidth() != 64) {
+
+ isPtrRtnN = false;
+
+ }
+
+ }
+
+ }
+
+ bool isMemcmp =
+ (!FuncName.compare("memcmp") || !FuncName.compare("bcmp") ||
+ !FuncName.compare("CRYPTO_memcmp") ||
+ !FuncName.compare("OPENSSL_memcmp") ||
+ !FuncName.compare("memcmp_const_time") ||
+ !FuncName.compare("memcmpct"));
+ isMemcmp &= FT->getNumParams() == 3 &&
+ FT->getReturnType()->isIntegerTy(32) &&
+ FT->getParamType(0)->isPointerTy() &&
+ FT->getParamType(1)->isPointerTy() &&
+ FT->getParamType(2)->isIntegerTy();
+
+ bool isStrcmp =
+ (!FuncName.compare("strcmp") || !FuncName.compare("xmlStrcmp") ||
+ !FuncName.compare("xmlStrEqual") ||
+ !FuncName.compare("g_strcmp0") ||
+ !FuncName.compare("curl_strequal") ||
+ !FuncName.compare("strcsequal") ||
+ !FuncName.compare("strcasecmp") ||
+ !FuncName.compare("stricmp") ||
+ !FuncName.compare("ap_cstr_casecmp") ||
+ !FuncName.compare("OPENSSL_strcasecmp") ||
+ !FuncName.compare("xmlStrcasecmp") ||
+ !FuncName.compare("g_strcasecmp") ||
+ !FuncName.compare("g_ascii_strcasecmp") ||
+ !FuncName.compare("Curl_strcasecompare") ||
+ !FuncName.compare("Curl_safe_strcasecompare") ||
+ !FuncName.compare("cmsstrcasecmp") ||
+ !FuncName.compare("strstr") ||
+ !FuncName.compare("g_strstr_len") ||
+ !FuncName.compare("ap_strcasestr") ||
+ !FuncName.compare("xmlStrstr") ||
+ !FuncName.compare("xmlStrcasestr") ||
+ !FuncName.compare("g_str_has_prefix") ||
+ !FuncName.compare("g_str_has_suffix"));
+ isStrcmp &=
+ FT->getNumParams() == 2 && FT->getReturnType()->isIntegerTy(32) &&
+ FT->getParamType(0) == FT->getParamType(1) &&
+ FT->getParamType(0) == IntegerType::getInt8PtrTy(M.getContext());
+
+ bool isStrncmp = (!FuncName.compare("strncmp") ||
+ !FuncName.compare("xmlStrncmp") ||
+ !FuncName.compare("curl_strnequal") ||
+ !FuncName.compare("strncasecmp") ||
+ !FuncName.compare("strnicmp") ||
+ !FuncName.compare("ap_cstr_casecmpn") ||
+ !FuncName.compare("OPENSSL_strncasecmp") ||
+ !FuncName.compare("xmlStrncasecmp") ||
+ !FuncName.compare("g_ascii_strncasecmp") ||
+ !FuncName.compare("Curl_strncasecompare") ||
+ !FuncName.compare("g_strncasecmp"));
+ isStrncmp &= FT->getNumParams() == 3 &&
+ FT->getReturnType()->isIntegerTy(32) &&
+ FT->getParamType(0) == FT->getParamType(1) &&
+ FT->getParamType(0) ==
+ IntegerType::getInt8PtrTy(M.getContext()) &&
+ FT->getParamType(2)->isIntegerTy();
+
bool isGccStdStringStdString =
Callee->getName().find("__is_charIT_EE7__value") !=
std::string::npos &&
@@ -267,13 +404,19 @@ bool CmpLogRoutines::hookRtns(Module &M) {
*/
if (isGccStdStringCString || isGccStdStringStdString ||
- isLlvmStdStringStdString || isLlvmStdStringCString) {
+ isLlvmStdStringStdString || isLlvmStdStringCString || isMemcmp ||
+ isStrcmp || isStrncmp) {
- isPtrRtn = false;
+ isPtrRtnN = isPtrRtn = false;
}
+ if (isPtrRtnN) { isPtrRtn = false; }
+
if (isPtrRtn) { calls.push_back(callInst); }
+ if (isMemcmp || isPtrRtnN) { Memcmp.push_back(callInst); }
+ if (isStrcmp) { Strcmp.push_back(callInst); }
+ if (isStrncmp) { Strncmp.push_back(callInst); }
if (isGccStdStringStdString) { gccStdStd.push_back(callInst); }
if (isGccStdStringCString) { gccStdC.push_back(callInst); }
if (isLlvmStdStringStdString) { llvmStdStd.push_back(callInst); }
@@ -288,7 +431,8 @@ bool CmpLogRoutines::hookRtns(Module &M) {
}
if (!calls.size() && !gccStdStd.size() && !gccStdC.size() &&
- !llvmStdStd.size() && !llvmStdC.size())
+ !llvmStdStd.size() && !llvmStdC.size() && !Memcmp.size() &&
+ Strcmp.size() && Strncmp.size())
return false;
/*
@@ -323,6 +467,96 @@ bool CmpLogRoutines::hookRtns(Module &M) {
}
+ for (auto &callInst : Memcmp) {
+
+ Value *v1P = callInst->getArgOperand(0), *v2P = callInst->getArgOperand(1),
+ *v3P = callInst->getArgOperand(2);
+
+ IRBuilder<> IRB2(callInst->getParent());
+ IRB2.SetInsertPoint(callInst);
+
+ LoadInst *CmpPtr = IRB2.CreateLoad(AFLCmplogPtr);
+ CmpPtr->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
+ auto is_not_null = IRB2.CreateICmpNE(CmpPtr, Null);
+ auto ThenTerm = SplitBlockAndInsertIfThen(is_not_null, callInst, false);
+
+ IRBuilder<> IRB(ThenTerm);
+
+ std::vector<Value *> args;
+ Value * v1Pcasted = IRB.CreatePointerCast(v1P, i8PtrTy);
+ Value * v2Pcasted = IRB.CreatePointerCast(v2P, i8PtrTy);
+ Value * v3Pbitcast = IRB.CreateBitCast(
+ v3P, IntegerType::get(C, v3P->getType()->getPrimitiveSizeInBits()));
+ Value *v3Pcasted =
+ IRB.CreateIntCast(v3Pbitcast, IntegerType::get(C, 64), false);
+ args.push_back(v1Pcasted);
+ args.push_back(v2Pcasted);
+ args.push_back(v3Pcasted);
+
+ IRB.CreateCall(cmplogHookFnN, args);
+
+ // errs() << callInst->getCalledFunction()->getName() << "\n";
+
+ }
+
+ for (auto &callInst : Strcmp) {
+
+ Value *v1P = callInst->getArgOperand(0), *v2P = callInst->getArgOperand(1);
+
+ IRBuilder<> IRB2(callInst->getParent());
+ IRB2.SetInsertPoint(callInst);
+
+ LoadInst *CmpPtr = IRB2.CreateLoad(AFLCmplogPtr);
+ CmpPtr->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
+ auto is_not_null = IRB2.CreateICmpNE(CmpPtr, Null);
+ auto ThenTerm = SplitBlockAndInsertIfThen(is_not_null, callInst, false);
+
+ IRBuilder<> IRB(ThenTerm);
+
+ std::vector<Value *> args;
+ Value * v1Pcasted = IRB.CreatePointerCast(v1P, i8PtrTy);
+ Value * v2Pcasted = IRB.CreatePointerCast(v2P, i8PtrTy);
+ args.push_back(v1Pcasted);
+ args.push_back(v2Pcasted);
+
+ IRB.CreateCall(cmplogHookFnStr, args);
+
+ // errs() << callInst->getCalledFunction()->getName() << "\n";
+
+ }
+
+ for (auto &callInst : Strncmp) {
+
+ Value *v1P = callInst->getArgOperand(0), *v2P = callInst->getArgOperand(1),
+ *v3P = callInst->getArgOperand(2);
+
+ IRBuilder<> IRB2(callInst->getParent());
+ IRB2.SetInsertPoint(callInst);
+
+ LoadInst *CmpPtr = IRB2.CreateLoad(AFLCmplogPtr);
+ CmpPtr->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
+ auto is_not_null = IRB2.CreateICmpNE(CmpPtr, Null);
+ auto ThenTerm = SplitBlockAndInsertIfThen(is_not_null, callInst, false);
+
+ IRBuilder<> IRB(ThenTerm);
+
+ std::vector<Value *> args;
+ Value * v1Pcasted = IRB.CreatePointerCast(v1P, i8PtrTy);
+ Value * v2Pcasted = IRB.CreatePointerCast(v2P, i8PtrTy);
+ Value * v3Pbitcast = IRB.CreateBitCast(
+ v3P, IntegerType::get(C, v3P->getType()->getPrimitiveSizeInBits()));
+ Value *v3Pcasted =
+ IRB.CreateIntCast(v3Pbitcast, IntegerType::get(C, 64), false);
+ args.push_back(v1Pcasted);
+ args.push_back(v2Pcasted);
+ args.push_back(v3Pcasted);
+
+ IRB.CreateCall(cmplogHookFnStrN, args);
+
+ // errs() << callInst->getCalledFunction()->getName() << "\n";
+
+ }
+
for (auto &callInst : gccStdStd) {
Value *v1P = callInst->getArgOperand(0), *v2P = callInst->getArgOperand(1);
diff --git a/qemu_mode/QEMUAFL_VERSION b/qemu_mode/QEMUAFL_VERSION
index 7bdedf7b..0ffae35c 100644
--- a/qemu_mode/QEMUAFL_VERSION
+++ b/qemu_mode/QEMUAFL_VERSION
@@ -1 +1 @@
-71ed0d206f
+8809a2b2ebf089d3427dd8f6a0044bcc2e13b389
diff --git a/qemu_mode/qemuafl b/qemu_mode/qemuafl
-Subproject 71ed0d206fd3d877420dceb4993a1011a4637ae
+Subproject 8809a2b2ebf089d3427dd8f6a0044bcc2e13b38
diff --git a/src/afl-forkserver.c b/src/afl-forkserver.c
index 44b6c6f9..6320a26b 100644
--- a/src/afl-forkserver.c
+++ b/src/afl-forkserver.c
@@ -342,6 +342,16 @@ static void report_error_and_exit(int error) {
"the fuzzing target reports that the mmap() call to the shared "
"memory failed.");
break;
+ case FS_ERROR_OLD_CMPLOG:
+ FATAL(
+ "the -c cmplog target was instrumented with an too old afl++ "
+ "version, you need to recompile it.");
+ break;
+ case FS_ERROR_OLD_CMPLOG_QEMU:
+ FATAL(
+ "The AFL++ QEMU/FRIDA loaders are from an older version, for -c you "
+ "need to recompile it.\n");
+ break;
default:
FATAL("unknown error code %d from fuzzing target!", error);
@@ -663,6 +673,20 @@ void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv,
if ((status & FS_OPT_OLD_AFLPP_WORKAROUND) == FS_OPT_OLD_AFLPP_WORKAROUND)
status = (status & 0xf0ffffff);
+ if ((status & FS_OPT_NEWCMPLOG) == 0 && fsrv->cmplog_binary) {
+
+ if (fsrv->qemu_mode || fsrv->frida_mode) {
+
+ report_error_and_exit(FS_ERROR_OLD_CMPLOG_QEMU);
+
+ } else {
+
+ report_error_and_exit(FS_ERROR_OLD_CMPLOG);
+
+ }
+
+ }
+
if ((status & FS_OPT_SNAPSHOT) == FS_OPT_SNAPSHOT) {
fsrv->snapshot = 1;
diff --git a/src/afl-fuzz-one.c b/src/afl-fuzz-one.c
index a1134a22..f4d3b77f 100644
--- a/src/afl-fuzz-one.c
+++ b/src/afl-fuzz-one.c
@@ -448,11 +448,11 @@ u8 fuzz_one_original(afl_state_t *afl) {
ACTF(
"Fuzzing test case #%u (%u total, %llu uniq crashes found, "
- "perf_score=%0.0f, exec_us=%llu, hits=%u, map=%u)...",
+ "perf_score=%0.0f, exec_us=%llu, hits=%u, map=%u, ascii=%u)...",
afl->current_entry, afl->queued_paths, afl->unique_crashes,
afl->queue_cur->perf_score, afl->queue_cur->exec_us,
likely(afl->n_fuzz) ? afl->n_fuzz[afl->queue_cur->n_fuzz_entry] : 0,
- afl->queue_cur->bitmap_size);
+ afl->queue_cur->bitmap_size, afl->queue_cur->is_ascii);
fflush(stdout);
}
@@ -2003,11 +2003,16 @@ havoc_stage:
where we take the input file and make random stacked tweaks. */
#define MAX_HAVOC_ENTRY 59 /* 55 to 60 */
+#define MUTATE_ASCII_DICT 64
u32 r_max, r;
r_max = (MAX_HAVOC_ENTRY + 1) + (afl->extras_cnt ? 4 : 0) +
- (afl->a_extras_cnt ? 4 : 0);
+ (afl->a_extras_cnt
+ ? (unlikely(afl->cmplog_binary && afl->queue_cur->is_ascii)
+ ? MUTATE_ASCII_DICT
+ : 4)
+ : 0);
if (unlikely(afl->expand_havoc && afl->ready_for_splicing_count > 1)) {
@@ -2592,7 +2597,15 @@ havoc_stage:
if (afl->a_extras_cnt) {
- if (r < 2) {
+ u32 r_cmp = 2;
+
+ if (unlikely(afl->cmplog_binary && afl->queue_cur->is_ascii)) {
+
+ r_cmp = MUTATE_ASCII_DICT >> 1;
+
+ }
+
+ if (r < r_cmp) {
/* Use the dictionary. */
@@ -2612,7 +2625,7 @@ havoc_stage:
break;
- } else if (r < 4) {
+ } else if (r < (r_cmp << 1)) {
u32 use_extra = rand_below(afl, afl->a_extras_cnt);
u32 extra_len = afl->a_extras[use_extra].len;
@@ -2641,7 +2654,7 @@ havoc_stage:
} else {
- r -= 4;
+ r -= (r_cmp << 1);
}
diff --git a/src/afl-fuzz-queue.c b/src/afl-fuzz-queue.c
index 33c2b561..1523d556 100644
--- a/src/afl-fuzz-queue.c
+++ b/src/afl-fuzz-queue.c
@@ -315,7 +315,96 @@ void mark_as_redundant(afl_state_t *afl, struct queue_entry *q, u8 state) {
}
-/* check if ascii or UTF-8 */
+/* check if pointer is ascii or UTF-8 */
+
+u8 check_if_text_buf(u8 *buf, u32 len) {
+
+ u32 offset = 0, ascii = 0, utf8 = 0;
+
+ while (offset < len) {
+
+ // ASCII: <= 0x7F to allow ASCII control characters
+ if ((buf[offset + 0] == 0x09 || buf[offset + 0] == 0x0A ||
+ buf[offset + 0] == 0x0D ||
+ (0x20 <= buf[offset + 0] && buf[offset + 0] <= 0x7E))) {
+
+ offset++;
+ utf8++;
+ ascii++;
+ continue;
+
+ }
+
+ if (isascii((int)buf[offset]) || isprint((int)buf[offset])) {
+
+ ascii++;
+ // we continue though as it can also be a valid utf8
+
+ }
+
+ // non-overlong 2-byte
+ if (len - offset > 1 &&
+ ((0xC2 <= buf[offset + 0] && buf[offset + 0] <= 0xDF) &&
+ (0x80 <= buf[offset + 1] && buf[offset + 1] <= 0xBF))) {
+
+ offset += 2;
+ utf8++;
+ continue;
+
+ }
+
+ // excluding overlongs
+ if ((len - offset > 2) &&
+ ((buf[offset + 0] == 0xE0 &&
+ (0xA0 <= buf[offset + 1] && buf[offset + 1] <= 0xBF) &&
+ (0x80 <= buf[offset + 2] &&
+ buf[offset + 2] <= 0xBF)) || // straight 3-byte
+ (((0xE1 <= buf[offset + 0] && buf[offset + 0] <= 0xEC) ||
+ buf[offset + 0] == 0xEE || buf[offset + 0] == 0xEF) &&
+ (0x80 <= buf[offset + 1] && buf[offset + 1] <= 0xBF) &&
+ (0x80 <= buf[offset + 2] &&
+ buf[offset + 2] <= 0xBF)) || // excluding surrogates
+ (buf[offset + 0] == 0xED &&
+ (0x80 <= buf[offset + 1] && buf[offset + 1] <= 0x9F) &&
+ (0x80 <= buf[offset + 2] && buf[offset + 2] <= 0xBF)))) {
+
+ offset += 3;
+ utf8++;
+ continue;
+
+ }
+
+ // planes 1-3
+ if ((len - offset > 3) &&
+ ((buf[offset + 0] == 0xF0 &&
+ (0x90 <= buf[offset + 1] && buf[offset + 1] <= 0xBF) &&
+ (0x80 <= buf[offset + 2] && buf[offset + 2] <= 0xBF) &&
+ (0x80 <= buf[offset + 3] &&
+ buf[offset + 3] <= 0xBF)) || // planes 4-15
+ ((0xF1 <= buf[offset + 0] && buf[offset + 0] <= 0xF3) &&
+ (0x80 <= buf[offset + 1] && buf[offset + 1] <= 0xBF) &&
+ (0x80 <= buf[offset + 2] && buf[offset + 2] <= 0xBF) &&
+ (0x80 <= buf[offset + 3] && buf[offset + 3] <= 0xBF)) || // plane 16
+ (buf[offset + 0] == 0xF4 &&
+ (0x80 <= buf[offset + 1] && buf[offset + 1] <= 0x8F) &&
+ (0x80 <= buf[offset + 2] && buf[offset + 2] <= 0xBF) &&
+ (0x80 <= buf[offset + 3] && buf[offset + 3] <= 0xBF)))) {
+
+ offset += 4;
+ utf8++;
+ continue;
+
+ }
+
+ offset++;
+
+ }
+
+ return (utf8 > ascii ? utf8 : ascii);
+
+}
+
+/* check if queue entry is ascii or UTF-8 */
static u8 check_if_text(afl_state_t *afl, struct queue_entry *q) {
diff --git a/src/afl-fuzz-redqueen.c b/src/afl-fuzz-redqueen.c
index a1d6e021..0a6e5eee 100644
--- a/src/afl-fuzz-redqueen.c
+++ b/src/afl-fuzz-redqueen.c
@@ -45,6 +45,23 @@ enum {
};
+// add to dictionary enum
+// DEFAULT = 1, notTXT = 2, FOUND = 4, notSAME = 8
+enum {
+
+ DICT_ADD_NEVER = 0,
+ DICT_ADD_NOTFOUND_SAME_TXT = 1,
+ DICT_ADD_NOTFOUND_SAME = 3,
+ DICT_ADD_FOUND_SAME_TXT = 5,
+ DICT_ADD_FOUND_SAME = 7,
+ DICT_ADD_NOTFOUND_TXT = 9,
+ DICT_ADD_NOTFOUND = 11,
+ DICT_ADD_FOUND_TXT = 13,
+ DICT_ADD_FOUND = 15,
+ DICT_ADD_ANY = DICT_ADD_FOUND
+
+};
+
// CMPLOG LVL
enum {
@@ -54,6 +71,8 @@ enum {
};
+#define DICT_ADD_STRATEGY DICT_ADD_FOUND_SAME
+
struct range {
u32 start;
@@ -64,6 +83,10 @@ struct range {
};
+static u32 hshape;
+static u64 screen_update;
+static u64 last_update;
+
static struct range *add_range(struct range *ranges, u32 start, u32 end) {
struct range *r = ck_alloc_nozero(sizeof(struct range));
@@ -252,7 +275,6 @@ static u8 colorization(afl_state_t *afl, u8 *buf, u32 len,
u64 start_time = get_cur_time();
#endif
- u32 screen_update;
u64 orig_hit_cnt, new_hit_cnt, exec_cksum;
orig_hit_cnt = afl->queued_paths + afl->unique_crashes;
@@ -261,24 +283,6 @@ static u8 colorization(afl_state_t *afl, u8 *buf, u32 len,
afl->stage_max = (len << 1);
afl->stage_cur = 0;
- if (likely(afl->queue_cur->exec_us)) {
-
- if (likely((100000 / 2) >= afl->queue_cur->exec_us)) {
-
- screen_update = 100000 / afl->queue_cur->exec_us;
-
- } else {
-
- screen_update = 1;
-
- }
-
- } else {
-
- screen_update = 100000;
-
- }
-
// in colorization we do not classify counts, hence we have to calculate
// the original checksum.
if (unlikely(get_exec_checksum(afl, buf, len, &exec_cksum))) {
@@ -348,7 +352,7 @@ static u8 colorization(afl_state_t *afl, u8 *buf, u32 len,
}
- if (++afl->stage_cur % screen_update) { show_stats(afl); };
+ if (++afl->stage_cur % screen_update == 0) { show_stats(afl); };
}
@@ -440,10 +444,10 @@ static u8 colorization(afl_state_t *afl, u8 *buf, u32 len,
fprintf(
f,
"Colorization: fname=%s len=%u ms=%llu result=%u execs=%u found=%llu "
- "taint=%u\n",
+ "taint=%u ascii=%u auto_extra_before=%u\n",
afl->queue_cur->fname, len, get_cur_time() - start_time,
afl->queue_cur->colorized, afl->stage_cur, new_hit_cnt - orig_hit_cnt,
- positions);
+ positions, afl->queue_cur->is_ascii ? 1 : 0, afl->a_extras_cnt);
#ifndef _DEBUG
if (afl->not_on_tty) { fclose(f); }
@@ -759,11 +763,18 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
u32 its_len = MIN(len - idx, taint_len);
+ if (afl->fsrv.total_execs - last_update > screen_update) {
+
+ show_stats(afl);
+ last_update = afl->fsrv.total_execs;
+
+ }
+
// fprintf(stderr,
// "Encode: %llx->%llx into %llx(<-%llx) at idx=%u "
// "taint_len=%u shape=%u attr=%u\n",
// o_pattern, pattern, repl, changed_val, idx, taint_len,
- // h->shape + 1, attr);
+ // hshape, attr);
//#ifdef CMPLOG_SOLVE_TRANSFORM
// reverse atoi()/strnu?toll() is expensive, so we only to it in lvl 3
@@ -845,7 +856,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
u64 b_val, o_b_val, mask;
u8 bytes;
- switch (SHAPE_BYTES(h->shape)) {
+ switch (hshape) {
case 0:
case 1:
@@ -924,7 +935,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
s64 diff = pattern - b_val;
s64 o_diff = o_pattern - o_b_val;
/* fprintf(stderr, "DIFF1 idx=%03u shape=%02u %llx-%llx=%lx\n", idx,
- h->shape + 1, o_pattern, o_b_val, o_diff);
+ hshape, o_pattern, o_b_val, o_diff);
fprintf(stderr, "DIFF1 %016llx %llx-%llx=%lx\n", repl, pattern,
b_val, diff); */
if (diff == o_diff && diff) {
@@ -953,7 +964,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
s64 o_diff = o_pattern ^ o_b_val;
/* fprintf(stderr, "DIFF2 idx=%03u shape=%02u %llx-%llx=%lx\n",
- idx, h->shape + 1, o_pattern, o_b_val, o_diff);
+ idx, hshape, o_pattern, o_b_val, o_diff);
fprintf(stderr,
"DIFF2 %016llx %llx-%llx=%lx\n", repl, pattern, b_val, diff);
*/
@@ -1002,7 +1013,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
}
/* fprintf(stderr, "DIFF3 idx=%03u shape=%02u %llx-%llx=%lx\n",
- idx, h->shape + 1, o_pattern, o_b_val, o_diff);
+ idx, hshape, o_pattern, o_b_val, o_diff);
fprintf(stderr,
"DIFF3 %016llx %llx-%llx=%lx\n", repl, pattern, b_val, diff);
*/
@@ -1051,7 +1062,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
}
/* fprintf(stderr, "DIFF4 idx=%03u shape=%02u %llx-%llx=%lx\n",
- idx, h->shape + 1, o_pattern, o_b_val, o_diff);
+ idx, hshape, o_pattern, o_b_val, o_diff);
fprintf(stderr,
"DIFF4 %016llx %llx-%llx=%lx\n", repl, pattern, b_val, diff);
*/
@@ -1089,7 +1100,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
if ((lvl & LVL1) || attr >= IS_FP_MOD) {
- if (SHAPE_BYTES(h->shape) >= 8 && *status != 1) {
+ if (hshape >= 8 && *status != 1) {
// if (its_len >= 8)
// fprintf(stderr,
@@ -1132,7 +1143,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
}
- if (SHAPE_BYTES(h->shape) >= 4 && *status != 1) {
+ if (hshape >= 4 && *status != 1) {
// if (its_len >= 4 && (attr <= 1 || attr >= 8))
// fprintf(stderr,
@@ -1173,7 +1184,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
}
- if (SHAPE_BYTES(h->shape) >= 2 && *status != 1) {
+ if (hshape >= 2 && *status != 1) {
if (its_len >= 2 &&
((*buf_16 == (u16)pattern && *o_buf_16 == (u16)o_pattern) ||
@@ -1244,11 +1255,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
}
- if (!(attr & (IS_GREATER | IS_LESSER)) || SHAPE_BYTES(h->shape) < 4) {
-
- return 0;
-
- }
+ if (!(attr & (IS_GREATER | IS_LESSER)) || hshape < 4) { return 0; }
// transform >= to < and <= to >
if ((attr & IS_EQUAL) && (attr & (IS_GREATER | IS_LESSER))) {
@@ -1272,7 +1279,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
if (attr & IS_GREATER) {
- if (SHAPE_BYTES(h->shape) == 4 && its_len >= 4) {
+ if (hshape == 4 && its_len >= 4) {
float *f = (float *)&repl;
float g = *f;
@@ -1280,7 +1287,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
u32 *r = (u32 *)&g;
repl_new = (u32)*r;
- } else if (SHAPE_BYTES(h->shape) == 8 && its_len >= 8) {
+ } else if (hshape == 8 && its_len >= 8) {
double *f = (double *)&repl;
double g = *f;
@@ -1307,7 +1314,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
} else {
- if (SHAPE_BYTES(h->shape) == 4) {
+ if (hshape == 4) {
float *f = (float *)&repl;
float g = *f;
@@ -1315,7 +1322,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
u32 *r = (u32 *)&g;
repl_new = (u32)*r;
- } else if (SHAPE_BYTES(h->shape) == 8) {
+ } else if (hshape == 8) {
double *f = (double *)&repl;
double g = *f;
@@ -1342,7 +1349,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
}
// transform double to float, llvm likes to do that internally ...
- if (SHAPE_BYTES(h->shape) == 8 && its_len >= 4) {
+ if (hshape == 8 && its_len >= 4) {
double *f = (double *)&repl;
float g = (float)*f;
@@ -1353,7 +1360,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
memcpy(((char *)&repl_new) + 4, (char *)&g, 4);
#endif
changed_val = repl_new;
- h->shape = 3; // modify shape
+ hshape = 4; // modify shape
// fprintf(stderr, "DOUBLE2FLOAT %llx\n", repl_new);
@@ -1361,12 +1368,12 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
afl, h, pattern, repl_new, o_pattern, changed_val, 16, idx,
taint_len, orig_buf, buf, cbuf, len, 1, lvl, status))) {
- h->shape = 7; // recover shape
+ hshape = 8; // recover shape
return 1;
}
- h->shape = 7; // recover shape
+ hshape = 8; // recover shape
}
@@ -1421,6 +1428,13 @@ static u8 cmp_extend_encodingN(afl_state_t *afl, struct cmp_header *h,
u32 taint_len, u8 *orig_buf, u8 *buf, u8 *cbuf,
u32 len, u8 do_reverse, u8 lvl, u8 *status) {
+ if (afl->fsrv.total_execs - last_update > screen_update) {
+
+ show_stats(afl);
+ last_update = afl->fsrv.total_execs;
+
+ }
+
u8 *ptr = (u8 *)&buf[idx];
u8 *o_ptr = (u8 *)&orig_buf[idx];
u8 *p = (u8 *)&pattern;
@@ -1428,52 +1442,51 @@ static u8 cmp_extend_encodingN(afl_state_t *afl, struct cmp_header *h,
u8 *r = (u8 *)&repl;
u8 backup[16];
u32 its_len = MIN(len - idx, taint_len);
- u32 shape = h->shape + 1;
#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
size_t off = 0;
#else
- size_t off = 16 - shape;
+ size_t off = 16 - hshape;
#endif
- if (its_len >= shape) {
+ if (its_len >= hshape) {
#ifdef _DEBUG
fprintf(stderr, "TestUN: %u>=%u (len=%u idx=%u attr=%u off=%lu) (%u) ",
- its_len, shape, len, idx, attr, off, do_reverse);
+ its_len, hshape, len, idx, attr, off, do_reverse);
u32 i;
u8 *o_r = (u8 *)&changed_val;
- for (i = 0; i < shape; i++)
+ for (i = 0; i < hshape; i++)
fprintf(stderr, "%02x", ptr[i]);
fprintf(stderr, "==");
- for (i = 0; i < shape; i++)
+ for (i = 0; i < hshape; i++)
fprintf(stderr, "%02x", p[off + i]);
fprintf(stderr, " ");
- for (i = 0; i < shape; i++)
+ for (i = 0; i < hshape; i++)
fprintf(stderr, "%02x", o_ptr[i]);
fprintf(stderr, "==");
- for (i = 0; i < shape; i++)
+ for (i = 0; i < hshape; i++)
fprintf(stderr, "%02x", o_p[off + i]);
fprintf(stderr, " <= ");
- for (i = 0; i < shape; i++)
+ for (i = 0; i < hshape; i++)
fprintf(stderr, "%02x", r[off + i]);
fprintf(stderr, "<-");
- for (i = 0; i < shape; i++)
+ for (i = 0; i < hshape; i++)
fprintf(stderr, "%02x", o_r[off + i]);
fprintf(stderr, "\n");
#endif
- if (!memcmp(ptr, p + off, shape) && !memcmp(o_ptr, o_p + off, shape)) {
+ if (!memcmp(ptr, p + off, hshape) && !memcmp(o_ptr, o_p + off, hshape)) {
- memcpy(backup, ptr, shape);
- memcpy(ptr, r + off, shape);
+ memcpy(backup, ptr, hshape);
+ memcpy(ptr, r + off, hshape);
if (unlikely(its_fuzz(afl, buf, len, status))) { return 1; }
#ifdef CMPLOG_COMBINE
- if (*status == 1) { memcpy(cbuf + idx, r, shape); }
+ if (*status == 1) { memcpy(cbuf + idx, r, hshape); }
#endif
- memcpy(ptr, backup, shape);
+ memcpy(ptr, backup, hshape);
#ifdef _DEBUG
fprintf(stderr, "Status=%u\n", *status);
@@ -1485,10 +1498,10 @@ static u8 cmp_extend_encodingN(afl_state_t *afl, struct cmp_header *h,
if (do_reverse && *status != 1) {
if (unlikely(cmp_extend_encodingN(
- afl, h, SWAPN(pattern, (shape << 3)), SWAPN(repl, (shape << 3)),
- SWAPN(o_pattern, (shape << 3)), SWAPN(changed_val, (shape << 3)),
- attr, idx, taint_len, orig_buf, buf, cbuf, len, 0, lvl,
- status))) {
+ afl, h, SWAPN(pattern, (hshape << 3)), SWAPN(repl, (hshape << 3)),
+ SWAPN(o_pattern, (hshape << 3)),
+ SWAPN(changed_val, (hshape << 3)), attr, idx, taint_len, orig_buf,
+ buf, cbuf, len, 0, lvl, status))) {
return 1;
@@ -1615,6 +1628,8 @@ static u8 cmp_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf,
u8 s_v0_inc = 1, s_v1_inc = 1;
u8 s_v0_dec = 1, s_v1_dec = 1;
+ hshape = SHAPE_BYTES(h->shape);
+
if (h->hits > CMP_MAP_H) {
loggeds = CMP_MAP_H;
@@ -1626,7 +1641,7 @@ static u8 cmp_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf,
}
#ifdef WORD_SIZE_64
- switch (SHAPE_BYTES(h->shape)) {
+ switch (hshape) {
case 1:
case 2:
@@ -1679,8 +1694,7 @@ static u8 cmp_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf,
#ifdef _DEBUG
fprintf(stderr, "Handling: %llx->%llx vs %llx->%llx attr=%u shape=%u\n",
- orig_o->v0, o->v0, orig_o->v1, o->v1, h->attribute,
- SHAPE_BYTES(h->shape));
+ orig_o->v0, o->v0, orig_o->v1, o->v1, h->attribute, hshape);
#endif
t = taint;
@@ -1830,27 +1844,41 @@ static u8 cmp_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf,
"END: %llx->%llx vs %llx->%llx attr=%u i=%u found=%u "
"isN=%u size=%u\n",
orig_o->v0, o->v0, orig_o->v1, o->v1, h->attribute, i, found_one,
- is_n, SHAPE_BYTES(h->shape));
+ is_n, hshape);
#endif
- // If failed, add to dictionary
- if (!found_one) {
+ // we only learn 16 bit +
+ if (hshape > 1) {
- if (afl->pass_stats[key].total == 0) {
+ if (!found_one || afl->queue_cur->is_ascii) {
#ifdef WORD_SIZE_64
if (unlikely(is_n)) {
- try_to_add_to_dictN(afl, s128_v0, SHAPE_BYTES(h->shape));
- try_to_add_to_dictN(afl, s128_v1, SHAPE_BYTES(h->shape));
+ if (!found_one ||
+ check_if_text_buf((u8 *)&s128_v0, SHAPE_BYTES(h->shape)) ==
+ SHAPE_BYTES(h->shape))
+ try_to_add_to_dictN(afl, s128_v0, SHAPE_BYTES(h->shape));
+ if (!found_one ||
+ check_if_text_buf((u8 *)&s128_v1, SHAPE_BYTES(h->shape)) ==
+ SHAPE_BYTES(h->shape))
+ try_to_add_to_dictN(afl, s128_v1, SHAPE_BYTES(h->shape));
} else
#endif
{
- try_to_add_to_dict(afl, o->v0, SHAPE_BYTES(h->shape));
- try_to_add_to_dict(afl, o->v1, SHAPE_BYTES(h->shape));
+ if (!memcmp((u8 *)&o->v0, (u8 *)&orig_o->v0, SHAPE_BYTES(h->shape)) &&
+ (!found_one ||
+ check_if_text_buf((u8 *)&o->v0, SHAPE_BYTES(h->shape)) ==
+ SHAPE_BYTES(h->shape)))
+ try_to_add_to_dict(afl, o->v0, SHAPE_BYTES(h->shape));
+ if (!memcmp((u8 *)&o->v1, (u8 *)&orig_o->v1, SHAPE_BYTES(h->shape)) &&
+ (!found_one ||
+ check_if_text_buf((u8 *)&o->v1, SHAPE_BYTES(h->shape)) ==
+ SHAPE_BYTES(h->shape)))
+ try_to_add_to_dict(afl, o->v1, SHAPE_BYTES(h->shape));
}
@@ -1882,8 +1910,9 @@ static u8 cmp_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf,
}
-static u8 rtn_extend_encoding(afl_state_t *afl, u8 *pattern, u8 *repl,
- u8 *o_pattern, u8 *changed_val, u8 plen, u32 idx,
+static u8 rtn_extend_encoding(afl_state_t *afl, u8 entry,
+ struct cmpfn_operands *o,
+ struct cmpfn_operands *orig_o, u32 idx,
u32 taint_len, u8 *orig_buf, u8 *buf, u8 *cbuf,
u32 len, u8 lvl, u8 *status) {
@@ -1894,9 +1923,60 @@ static u8 rtn_extend_encoding(afl_state_t *afl, u8 *pattern, u8 *repl,
// (void)(changed_val);
//#endif
+ if (afl->fsrv.total_execs - last_update > screen_update) {
+
+ show_stats(afl);
+ last_update = afl->fsrv.total_execs;
+
+ }
+
+ u8 *pattern, *repl, *o_pattern, *changed_val;
+ u8 l0, l1, ol0, ol1;
+
+ if (entry == 0) {
+
+ pattern = o->v0;
+ repl = o->v1;
+ o_pattern = orig_o->v0;
+ changed_val = orig_o->v1;
+ l0 = o->v0_len;
+ ol0 = orig_o->v0_len;
+ l1 = o->v1_len;
+ ol1 = orig_o->v1_len;
+
+ } else {
+
+ pattern = o->v1;
+ repl = o->v0;
+ o_pattern = orig_o->v1;
+ changed_val = orig_o->v0;
+ l0 = o->v1_len;
+ ol0 = orig_o->v1_len;
+ l1 = o->v0_len;
+ ol1 = orig_o->v0_len;
+
+ }
+
+ if (l0 >= 0x80 || ol0 >= 0x80) {
+
+ l0 -= 0x80;
+ l1 -= 0x80;
+ ol0 -= 0x80;
+ ol1 -= 0x80;
+
+ }
+
+ if (l0 == 0 || l1 == 0 || ol0 == 0 || ol1 == 0 || l0 > 31 || l1 > 31 ||
+ ol0 > 31 || ol1 > 31) {
+
+ l0 = l1 = ol0 = ol1 = hshape;
+
+ }
+
+ u8 lmax = MAX(l0, ol0);
u8 save[40];
u32 saved_idx = idx, pre, from = 0, to = 0, i, j;
- u32 its_len = MIN((u32)plen, len - idx);
+ u32 its_len = MIN(MIN(lmax, hshape), len - idx);
its_len = MIN(its_len, taint_len);
u32 saved_its_len = its_len;
@@ -1912,7 +1992,8 @@ static u8 rtn_extend_encoding(afl_state_t *afl, u8 *pattern, u8 *repl,
(void)(j);
#ifdef _DEBUG
- fprintf(stderr, "RTN T idx=%u lvl=%02x ", idx, lvl);
+ fprintf(stderr, "RTN T idx=%u lvl=%02x is_txt=%u shape=%u/%u ", idx, lvl,
+ o->v0_len >= 0x80 ? 1 : 0, hshape, l0);
for (j = 0; j < 8; j++)
fprintf(stderr, "%02x", orig_buf[idx + j]);
fprintf(stderr, " -> ");
@@ -1972,10 +2053,10 @@ static u8 rtn_extend_encoding(afl_state_t *afl, u8 *pattern, u8 *repl,
}
- //#ifdef CMPLOG_SOLVE_TRANSFORM
-
if (*status == 1) return 0;
+ // transform solving
+
if (afl->cmplog_enable_transform && (lvl & LVL3)) {
u32 toupper = 0, tolower = 0, xor = 0, arith = 0, tohex = 0, fromhex = 0;
@@ -2322,6 +2403,8 @@ static u8 rtn_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf,
u32 i, j, idx, have_taint = 1, taint_len, loggeds;
u8 status = 0, found_one = 0;
+ hshape = SHAPE_BYTES(h->shape);
+
if (h->hits > CMP_MAP_RTN_H) {
loggeds = CMP_MAP_RTN_H;
@@ -2353,18 +2436,22 @@ static u8 rtn_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf,
}
/*
- struct cmp_header *hh = &afl->orig_cmp_map->headers[key];
- fprintf(stderr, "RTN N hits=%u id=%u shape=%u attr=%u v0=", h->hits,
- h->id, h->shape, h->attribute);
- for (j = 0; j < 8; j++) fprintf(stderr, "%02x", o->v0[j]);
- fprintf(stderr, " v1=");
- for (j = 0; j < 8; j++) fprintf(stderr, "%02x", o->v1[j]);
- fprintf(stderr, "\nRTN O hits=%u id=%u shape=%u attr=%u o0=",
- hh->hits, hh->id, hh->shape, hh->attribute);
- for (j = 0; j < 8; j++) fprintf(stderr, "%02x", orig_o->v0[j]);
- fprintf(stderr, " o1=");
- for (j = 0; j < 8; j++) fprintf(stderr, "%02x", orig_o->v1[j]);
- fprintf(stderr, "\n");
+ struct cmp_header *hh = &afl->orig_cmp_map->headers[key];
+ fprintf(stderr, "RTN N hits=%u id=%u shape=%u attr=%u v0=", h->hits, h->id,
+ hshape, h->attribute);
+ for (j = 0; j < 8; j++)
+ fprintf(stderr, "%02x", o->v0[j]);
+ fprintf(stderr, " v1=");
+ for (j = 0; j < 8; j++)
+ fprintf(stderr, "%02x", o->v1[j]);
+ fprintf(stderr, "\nRTN O hits=%u id=%u shape=%u attr=%u o0=", hh->hits,
+ hh->id, hshape, hh->attribute);
+ for (j = 0; j < 8; j++)
+ fprintf(stderr, "%02x", orig_o->v0[j]);
+ fprintf(stderr, " o1=");
+ for (j = 0; j < 8; j++)
+ fprintf(stderr, "%02x", orig_o->v1[j]);
+ fprintf(stderr, "\n");
*/
t = taint;
@@ -2400,25 +2487,24 @@ static u8 rtn_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf,
#ifdef _DEBUG
int w;
- fprintf(stderr, "key=%u idx=%u len=%u o0=", key, idx,
- SHAPE_BYTES(h->shape));
- for (w = 0; w < SHAPE_BYTES(h->shape); ++w)
+ fprintf(stderr, "key=%u idx=%u len=%u o0=", key, idx, hshape);
+ for (w = 0; w < hshape; ++w)
fprintf(stderr, "%02x", orig_o->v0[w]);
fprintf(stderr, " v0=");
- for (w = 0; w < SHAPE_BYTES(h->shape); ++w)
+ for (w = 0; w < hshape; ++w)
fprintf(stderr, "%02x", o->v0[w]);
fprintf(stderr, " o1=");
- for (w = 0; w < SHAPE_BYTES(h->shape); ++w)
+ for (w = 0; w < hshape; ++w)
fprintf(stderr, "%02x", orig_o->v1[w]);
fprintf(stderr, " v1=");
- for (w = 0; w < SHAPE_BYTES(h->shape); ++w)
+ for (w = 0; w < hshape; ++w)
fprintf(stderr, "%02x", o->v1[w]);
fprintf(stderr, "\n");
#endif
- if (unlikely(rtn_extend_encoding(
- afl, o->v0, o->v1, orig_o->v0, orig_o->v1, SHAPE_BYTES(h->shape),
- idx, taint_len, orig_buf, buf, cbuf, len, lvl, &status))) {
+ if (unlikely(rtn_extend_encoding(afl, 0, o, orig_o, idx, taint_len,
+ orig_buf, buf, cbuf, len, lvl,
+ &status))) {
return 1;
@@ -2433,9 +2519,9 @@ static u8 rtn_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf,
status = 0;
- if (unlikely(rtn_extend_encoding(
- afl, o->v1, o->v0, orig_o->v1, orig_o->v0, SHAPE_BYTES(h->shape),
- idx, taint_len, orig_buf, buf, cbuf, len, lvl, &status))) {
+ if (unlikely(rtn_extend_encoding(afl, 1, o, orig_o, idx, taint_len,
+ orig_buf, buf, cbuf, len, lvl,
+ &status))) {
return 1;
@@ -2450,16 +2536,42 @@ static u8 rtn_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf,
}
- // If failed, add to dictionary
- if (!found_one && (lvl & LVL1)) {
+ // if (unlikely(!afl->pass_stats[key].total)) {
+
+ if ((!found_one && (lvl & LVL1)) || afl->queue_cur->is_ascii) {
+
+ // if (unlikely(!afl->pass_stats[key].total)) {
+
+ u32 shape_len = SHAPE_BYTES(h->shape);
+ u32 v0_len = shape_len, v1_len = shape_len;
+ if (afl->queue_cur->is_ascii ||
+ check_if_text_buf((u8 *)&o->v0, shape_len) == shape_len) {
+
+ if (strlen(o->v0)) v0_len = strlen(o->v0);
+
+ }
- if (unlikely(!afl->pass_stats[key].total)) {
+ if (afl->queue_cur->is_ascii ||
+ check_if_text_buf((u8 *)&o->v1, shape_len) == shape_len) {
- maybe_add_auto(afl, o->v0, SHAPE_BYTES(h->shape));
- maybe_add_auto(afl, o->v1, SHAPE_BYTES(h->shape));
+ if (strlen(o->v1)) v1_len = strlen(o->v1);
}
+ // fprintf(stderr, "SHOULD: found:%u ascii:%u text?%u:%u %u:%s %u:%s \n",
+ // found_one, afl->queue_cur->is_ascii, check_if_text_buf((u8 *)&o->v0,
+ // shape_len), check_if_text_buf((u8 *)&o->v1, shape_len), v0_len,
+ // o->v0, v1_len, o->v1);
+
+ if (!memcmp(o->v0, orig_o->v0, v0_len) ||
+ (!found_one || check_if_text_buf((u8 *)&o->v0, v0_len) == v0_len))
+ maybe_add_auto(afl, o->v0, v0_len);
+ if (!memcmp(o->v1, orig_o->v1, v1_len) ||
+ (!found_one || check_if_text_buf((u8 *)&o->v1, v1_len) == v1_len))
+ maybe_add_auto(afl, o->v1, v1_len);
+
+ //}
+
}
rtn_fuzz_next_iter:
@@ -2492,6 +2604,23 @@ u8 input_to_state_stage(afl_state_t *afl, u8 *orig_buf, u8 *buf, u32 len) {
}
struct tainted *taint = NULL;
+ if (likely(afl->queue_cur->exec_us)) {
+
+ if (likely((100000 / 2) >= afl->queue_cur->exec_us)) {
+
+ screen_update = 100000 / afl->queue_cur->exec_us;
+
+ } else {
+
+ screen_update = 1;
+
+ }
+
+ } else {
+
+ screen_update = 100000;
+
+ }
if (!afl->queue_cur->taint || !afl->queue_cur->cmplog_colorinput) {
@@ -2592,8 +2721,6 @@ u8 input_to_state_stage(afl_state_t *afl, u8 *orig_buf, u8 *buf, u32 len) {
u64 orig_hit_cnt, new_hit_cnt;
u64 orig_execs = afl->fsrv.total_execs;
orig_hit_cnt = afl->queued_paths + afl->unique_crashes;
- u64 screen_update = 100000 / afl->queue_cur->exec_us,
- execs = afl->fsrv.total_execs;
afl->stage_name = "input-to-state";
afl->stage_short = "its";
@@ -2630,11 +2757,13 @@ u8 input_to_state_stage(afl_state_t *afl, u8 *orig_buf, u8 *buf, u32 len) {
if (afl->shm.cmp_map->headers[k].type == CMP_TYPE_INS) {
+ // fprintf(stderr, "INS %u\n", k);
afl->stage_max +=
MIN((u32)(afl->shm.cmp_map->headers[k].hits), (u32)CMP_MAP_H);
} else {
+ // fprintf(stderr, "RTN %u\n", k);
afl->stage_max +=
MIN((u32)(afl->shm.cmp_map->headers[k].hits), (u32)CMP_MAP_RTN_H);
@@ -2673,13 +2802,6 @@ u8 input_to_state_stage(afl_state_t *afl, u8 *orig_buf, u8 *buf, u32 len) {
}
- if (afl->fsrv.total_execs - execs > screen_update) {
-
- execs = afl->fsrv.total_execs;
- show_stats(afl);
-
- }
-
}
r = 0;
@@ -2795,9 +2917,10 @@ exit_its:
if (f) {
fprintf(f,
- "Cmplog: fname=%s len=%u ms=%llu result=%u finds=%llu entries=%u\n",
+ "Cmplog: fname=%s len=%u ms=%llu result=%u finds=%llu entries=%u "
+ "auto_extra_after=%u\n",
afl->queue_cur->fname, len, get_cur_time() - start_time, r,
- new_hit_cnt - orig_hit_cnt, cmp_locations);
+ new_hit_cnt - orig_hit_cnt, cmp_locations, afl->a_extras_cnt);
#ifndef _DEBUG
if (afl->not_on_tty) { fclose(f); }
diff --git a/src/afl-fuzz-stats.c b/src/afl-fuzz-stats.c
index 1d48a76c..808bf258 100644
--- a/src/afl-fuzz-stats.c
+++ b/src/afl-fuzz-stats.c
@@ -278,6 +278,7 @@ void write_stats_file(afl_state_t *afl, u32 t_bytes, double bitmap_cvg,
"total_edges : %u\n"
"var_byte_count : %u\n"
"havoc_expansion : %u\n"
+ "auto_dict_entries : %u\n"
"testcache_size : %llu\n"
"testcache_count : %u\n"
"testcache_evict : %u\n"
@@ -316,7 +317,7 @@ void write_stats_file(afl_state_t *afl, u32 t_bytes, double bitmap_cvg,
-1,
#endif
t_bytes, afl->fsrv.real_map_size, afl->var_byte_count,
- afl->expand_havoc, afl->q_testcase_cache_size,
+ afl->expand_havoc, afl->a_extras_cnt, afl->q_testcase_cache_size,
afl->q_testcase_cache_count, afl->q_testcase_evictions,
afl->use_banner, afl->unicorn_mode ? "unicorn" : "",
afl->fsrv.qemu_mode ? "qemu " : "",
diff --git a/src/afl-fuzz.c b/src/afl-fuzz.c
index a4093693..c08b8fbb 100644
--- a/src/afl-fuzz.c
+++ b/src/afl-fuzz.c
@@ -2235,13 +2235,12 @@ int main(int argc, char **argv_orig, char **envp) {
}
- write_bitmap(afl);
- save_auto(afl);
-
stop_fuzzing:
afl->force_ui_update = 1; // ensure the screen is reprinted
show_stats(afl); // print the screen one last time
+ write_bitmap(afl);
+ save_auto(afl);
SAYF(CURSOR_SHOW cLRD "\n\n+++ Testing aborted %s +++\n" cRST,
afl->stop_soon == 2 ? "programmatically" : "by user");
@@ -2270,6 +2269,20 @@ stop_fuzzing:
}
+ if (afl->not_on_tty) {
+
+ u32 t_bytes = count_non_255_bytes(afl, afl->virgin_bits);
+ u8 time_tmp[64];
+ u_stringify_time_diff(time_tmp, get_cur_time(), afl->start_time);
+ ACTF(
+ "Statistics: %u new paths found, %.02f%% coverage achieved, %llu "
+ "crashes found, %llu timeouts found, total runtime %s",
+ afl->queued_discovered,
+ ((double)t_bytes * 100) / afl->fsrv.real_map_size, afl->unique_crashes,
+ afl->unique_hangs, time_tmp);
+
+ }
+
#ifdef PROFILING
SAYF(cYEL "[!] " cRST
"Profiling information: %llu ms total work, %llu ns/run\n",
diff --git a/src/afl-performance.c b/src/afl-performance.c
index c6fa554b..04507410 100644
--- a/src/afl-performance.c
+++ b/src/afl-performance.c
@@ -90,7 +90,8 @@ inline u32 hash32(u8 *key, u32 len, u32 seed) {
#endif
- return (u32)XXH64(key, len, seed);
+ (void)seed;
+ return (u32)XXH3_64bits(key, len);
}
@@ -102,7 +103,8 @@ inline u64 hash64(u8 *key, u32 len, u64 seed) {
#endif
- return XXH64(key, len, seed);
+ (void)seed;
+ return XXH3_64bits(key, len);
}
diff --git a/test/test-cmplog.c b/test/test-cmplog.c
index b077e3ab..262df6bd 100644
--- a/test/test-cmplog.c
+++ b/test/test-cmplog.c
@@ -1,15 +1,13 @@
#include <stdio.h>
#include <string.h>
+#include <stdint.h>
#include <stdarg.h>
#include <stdlib.h>
#include <stdint.h>
#include <unistd.h>
-int main(int argc, char *argv[]) {
- char buf[1024];
- ssize_t i;
- if ((i = read(0, buf, sizeof(buf) - 1)) < 24) return 0;
- buf[i] = 0;
+int LLVMFuzzerTestOneInput(const uint8_t *buf, size_t i) {
+ if (i < 24) return 0;
if (buf[0] != 'A') return 0;
if (buf[1] != 'B') return 0;
if (buf[2] != 'C') return 0;
@@ -18,6 +16,17 @@ int main(int argc, char *argv[]) {
if (strncmp(buf + 12, "IJKL", 4) == 0 && strcmp(buf + 16, "DEADBEEF") == 0)
abort();
return 0;
-
}
+#ifdef __AFL_COMPILER
+int main(int argc, char *argv[]) {
+ unsigned char buf[1024];
+ ssize_t i;
+ while(__AFL_LOOP(1000)) {
+ i = read(0, (char*)buf, sizeof(buf) - 1);
+ if (i > 0) buf[i] = 0;
+ LLVMFuzzerTestOneInput(buf, i);
+ }
+ return 0;
+}
+#endif