From c7cfa2349ae52f7688ee175b7bd427a57d497f42 Mon Sep 17 00:00:00 2001 From: prvyk Date: Sun, 2 Feb 2025 21:12:50 +0200 Subject: [PATCH 1/7] Add fnv hash for benchmarking comparison --- .../Tsavorite/cs/src/core/Utilities/Utility.cs | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/libs/storage/Tsavorite/cs/src/core/Utilities/Utility.cs b/libs/storage/Tsavorite/cs/src/core/Utilities/Utility.cs index a639355487..b25712d19d 100644 --- a/libs/storage/Tsavorite/cs/src/core/Utilities/Utility.cs +++ b/libs/storage/Tsavorite/cs/src/core/Utilities/Utility.cs @@ -178,21 +178,19 @@ public static long GetHashCode(long input) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe long HashBytes(byte* pbString, int len) { - const long magicno = 40343; + ulong _hash = 0xCBF29CE484222325UL; char* pwString = (char*)pbString; - int cbBuf = len / 2; - ulong hashState = (ulong)len; - for (int i = 0; i < cbBuf; i++, pwString++) - hashState = magicno * hashState + *pwString; - - if ((len & 1) > 0) + for (int i = 0; i < len; i++, pwString++) { - byte* pC = (byte*)pwString; - hashState = magicno * hashState + *pC; + unchecked + { + _hash ^= *pwString; + _hash *= 0x00000100000001B3UL; + } } - return (long)Rotr64(magicno * hashState, 4); + return (long)_hash; } /// From 48a49806472662c1c478788191723e09d1e14bb2 Mon Sep 17 00:00:00 2001 From: prvyk Date: Sun, 2 Feb 2025 21:50:43 +0200 Subject: [PATCH 2/7] Another attempt, this time just with adding unchecked. --- .../cs/src/core/Utilities/Utility.cs | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/libs/storage/Tsavorite/cs/src/core/Utilities/Utility.cs b/libs/storage/Tsavorite/cs/src/core/Utilities/Utility.cs index b25712d19d..6cc3965b08 100644 --- a/libs/storage/Tsavorite/cs/src/core/Utilities/Utility.cs +++ b/libs/storage/Tsavorite/cs/src/core/Utilities/Utility.cs @@ -178,19 +178,24 @@ public static long GetHashCode(long input) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe long HashBytes(byte* pbString, int len) { - ulong _hash = 0xCBF29CE484222325UL; + const long magicno = 40343; char* pwString = (char*)pbString; + int cbBuf = len / 2; + ulong hashState = (ulong)len; - for (int i = 0; i < len; i++, pwString++) + unchecked { - unchecked + for (int i = 0; i < cbBuf; i++, pwString++) + hashState = magicno * hashState + *pwString; + + if ((len & 1) > 0) { - _hash ^= *pwString; - _hash *= 0x00000100000001B3UL; + byte* pC = (byte*)pwString; + hashState = magicno * hashState + *pC; } - } - return (long)_hash; + return (long)Rotr64(magicno * hashState, 4); + } } /// From 1995394b08d1533bdcc923ac025c751ec8311887 Mon Sep 17 00:00:00 2001 From: prvyk Date: Sun, 2 Feb 2025 22:28:08 +0200 Subject: [PATCH 3/7] Implemet FNV-32 right just for testing - this seems slower than the current function. --- .../Tsavorite/cs/src/core/Utilities/Utility.cs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/libs/storage/Tsavorite/cs/src/core/Utilities/Utility.cs b/libs/storage/Tsavorite/cs/src/core/Utilities/Utility.cs index 6cc3965b08..745bd7d1a6 100644 --- a/libs/storage/Tsavorite/cs/src/core/Utilities/Utility.cs +++ b/libs/storage/Tsavorite/cs/src/core/Utilities/Utility.cs @@ -178,24 +178,26 @@ public static long GetHashCode(long input) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe long HashBytes(byte* pbString, int len) { - const long magicno = 40343; - char* pwString = (char*)pbString; + ulong _hash = 0x811C9DC5U; + var pwString = pbString; int cbBuf = len / 2; - ulong hashState = (ulong)len; unchecked { for (int i = 0; i < cbBuf; i++, pwString++) - hashState = magicno * hashState + *pwString; + { + _hash ^= *pwString; + _hash *= 0x01000193U; + } if ((len & 1) > 0) { byte* pC = (byte*)pwString; - hashState = magicno * hashState + *pC; + _hash = 40343 * _hash + *pC; } - - return (long)Rotr64(magicno * hashState, 4); } + + return (long)_hash; } /// From 5154f03e45b345096e91b3ccf58055aaab60b713 Mon Sep 17 00:00:00 2001 From: prvyk Date: Sun, 2 Feb 2025 23:53:54 +0200 Subject: [PATCH 4/7] CityHash64 Sometimes faster, sometimes slower, overall a tiny bit slower compared to unchecked function. --- .../cs/src/core/Utilities/Utility.cs | 117 ++++++++++++++++-- 1 file changed, 106 insertions(+), 11 deletions(-) diff --git a/libs/storage/Tsavorite/cs/src/core/Utilities/Utility.cs b/libs/storage/Tsavorite/cs/src/core/Utilities/Utility.cs index 745bd7d1a6..90f55fc5ae 100644 --- a/libs/storage/Tsavorite/cs/src/core/Utilities/Utility.cs +++ b/libs/storage/Tsavorite/cs/src/core/Utilities/Utility.cs @@ -2,6 +2,7 @@ // Licensed under the MIT license. using System; +using System.Buffers.Binary; using System.Diagnostics; using System.Numerics; using System.Runtime.CompilerServices; @@ -178,26 +179,120 @@ public static long GetHashCode(long input) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe long HashBytes(byte* pbString, int len) { - ulong _hash = 0x811C9DC5U; - var pwString = pbString; - int cbBuf = len / 2; + [MethodImpl(MethodImplOptions.AggressiveInlining)] + ulong RotateRight(ulong operand, int shiftCount) + { + shiftCount &= 0x3f; + + return + (operand >> shiftCount) | + (operand << (64 - shiftCount)); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + ulong ReverseByteOrder(ulong operand) + { + return + (operand >> 56) | + ((operand & 0x00ff000000000000) >> 40) | + ((operand & 0x0000ff0000000000) >> 24) | + ((operand & 0x000000ff00000000) >> 8) | + ((operand & 0x00000000ff000000) << 8) | + ((operand & 0x0000000000ff0000) << 24) | + ((operand & 0x000000000000ff00) << 40) | + (operand << 56); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + ulong Mix(ulong value) => + value ^ (value >> 47); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + long Hash64Len16(ulong u, ulong v, ulong mul) + { + ulong a = (u ^ v) * mul; + a ^= (a >> 47); + + ulong b = (v ^ a) * mul; + b ^= (b >> 47); + b *= mul; + + return (long)b; + } + + const ulong K0 = 0xc3a5c85c97cb3127; + const ulong K1 = 0xb492b66fbe98f273; + const ulong K2 = 0x9ae16a3b2f90404f; unchecked { - for (int i = 0; i < cbBuf; i++, pwString++) + if (len > 32) + { + ulong mul = K2 + (ulong)len * 2; + ulong a = Unsafe.Read(pbString) * K2; + ulong b = Unsafe.Read(pbString + 8); + ulong c = Unsafe.Read(pbString + len - 24); + ulong d = Unsafe.Read(pbString + len - 32); + ulong e = Unsafe.Read(pbString + 16) * K2; + ulong f = Unsafe.Read(pbString + 24) * 9; + ulong g = Unsafe.Read(pbString + len - 8); + ulong h = Unsafe.Read(pbString + len - 16) * mul; + + ulong u = RotateRight(a + g, 43) + (RotateRight(b, 30) + c) * 9; + ulong v = ((a + g) ^ d) + f + 1; + ulong w = ReverseByteOrder((u + v) * mul) + h; + ulong x = RotateRight(e + f, 42) + c; + ulong y = (ReverseByteOrder((v + w) * mul) + g) * mul; + ulong z = e + f + c; + + a = ReverseByteOrder((x + z) * mul + y) + b; + b = Mix((z + a) * mul + d + h) * mul; + return (long)(b + x); + } + else if (len > 16) { - _hash ^= *pwString; - _hash *= 0x01000193U; + ulong mul = K2 + (ulong)len * 2; + ulong a = Unsafe.Read(pbString) * K1; + ulong b = Unsafe.Read(pbString + 8); + ulong c = Unsafe.Read(pbString + len - 8) * mul; + ulong d = Unsafe.Read(pbString + len - 16) * K2; + + return (long)Hash64Len16( + RotateRight(a + b, 43) + + RotateRight(c, 30) + d, + a + RotateRight(b + K2, 18) + c, + mul); } + else if (len >= 8) + { + ulong mul = K2 + (ulong)len * 2; + ulong a = Unsafe.Read(pbString) + K2; + ulong b = Unsafe.Read(pbString + len - 8); + ulong c = RotateRight(b, 37) * mul + a; + ulong d = (RotateRight(a, 25) + b) * mul; - if ((len & 1) > 0) + return Hash64Len16(c, d, mul); + } + else if (len >= 4) { - byte* pC = (byte*)pwString; - _hash = 40343 * _hash + *pC; + ulong mul = K2 + (ulong)len * 2; + ulong a = Unsafe.Read(pbString); + return Hash64Len16((ulong)len + (a << 3), Unsafe.Read(pbString + len - 4), mul); } - } + else if (len > 0) + { + byte a = pbString[0]; + byte b = pbString[0 + (len >> 1)]; + byte c = pbString[len - 1]; + + uint y = (uint)a + ((uint)b << 8); + uint z = (uint)len + ((uint)c << 2); - return (long)_hash; + return (long)(Mix((ulong)(y * K2 ^ z * K0)) * K2); + } + + return (long)K2; + } } /// From f1b6d52755d351459edd36cd85020eda9aeda159 Mon Sep 17 00:00:00 2001 From: prvyk Date: Sun, 2 Feb 2025 23:57:04 +0200 Subject: [PATCH 5/7] dotnet format --- libs/storage/Tsavorite/cs/src/core/Utilities/Utility.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/storage/Tsavorite/cs/src/core/Utilities/Utility.cs b/libs/storage/Tsavorite/cs/src/core/Utilities/Utility.cs index 90f55fc5ae..fde805ba2d 100644 --- a/libs/storage/Tsavorite/cs/src/core/Utilities/Utility.cs +++ b/libs/storage/Tsavorite/cs/src/core/Utilities/Utility.cs @@ -219,7 +219,7 @@ long Hash64Len16(ulong u, ulong v, ulong mul) return (long)b; } - + const ulong K0 = 0xc3a5c85c97cb3127; const ulong K1 = 0xb492b66fbe98f273; const ulong K2 = 0x9ae16a3b2f90404f; From f7d5face94cb3ff1597599405cbc928bad13c0be Mon Sep 17 00:00:00 2001 From: prvyk Date: Mon, 3 Feb 2025 00:33:18 +0200 Subject: [PATCH 6/7] Forgot the >64 case. --- .../cs/src/core/Utilities/Utility.cs | 100 +++++++++++++++++- 1 file changed, 98 insertions(+), 2 deletions(-) diff --git a/libs/storage/Tsavorite/cs/src/core/Utilities/Utility.cs b/libs/storage/Tsavorite/cs/src/core/Utilities/Utility.cs index fde805ba2d..de5b58425e 100644 --- a/libs/storage/Tsavorite/cs/src/core/Utilities/Utility.cs +++ b/libs/storage/Tsavorite/cs/src/core/Utilities/Utility.cs @@ -12,6 +12,18 @@ namespace Tsavorite.core { + internal struct local_UInt128 + { + public ulong Low { get; set; } + public ulong High { get; set; } + + public local_UInt128(ulong low, ulong high) + { + Low = low; + High = high; + } + } + /// /// Empty type /// @@ -169,7 +181,7 @@ public static long GetHashCode(long input) return (long)Rotr64((ulong)local_rand_hash, 45); } - + /// /// Get 64-bit hash code for a byte array /// @@ -220,13 +232,97 @@ long Hash64Len16(ulong u, ulong v, ulong mul) return (long)b; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + ulong Hash128to64(local_UInt128 x) + { + const ulong kMul = 0x9ddfea08eb382d69; + + ulong a = (x.Low ^ x.High) * kMul; + a ^= (a >> 47); + + ulong b = (x.High ^ a) * kMul; + b ^= (b >> 47); + b *= kMul; + + return b; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + ulong _Hash64Len16(ulong u, ulong v) + { + return Hash128to64( + new local_UInt128(u, v)); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + local_UInt128 _WeakHashLen32WithSeeds(ulong w, ulong x, ulong y, ulong z, ulong a, ulong b) + { + a += w; + b = RotateRight(b + a + z, 21); + + ulong c = a; + a += x; + a += y; + + b += RotateRight(a, 44); + + return new local_UInt128(a + z, b + c); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + local_UInt128 WeakHashLen32WithSeeds(byte* data, int startIndex, UInt64 a, UInt64 b) + { + return _WeakHashLen32WithSeeds( + Unsafe.Read(data + startIndex), + Unsafe.Read(data + startIndex + 8), + Unsafe.Read(data + startIndex + 16), + Unsafe.Read(data + startIndex + 24), + a, + b); + } + const ulong K0 = 0xc3a5c85c97cb3127; const ulong K1 = 0xb492b66fbe98f273; const ulong K2 = 0x9ae16a3b2f90404f; unchecked { - if (len > 32) + if (len > 64) + { + ulong x = Unsafe.Read(pbString + len - 40); + ulong y = Unsafe.Read(pbString + len - 16) + Unsafe.Read(pbString + len - 56); + ulong z = _Hash64Len16( + Unsafe.Read(pbString + len - 48) + (ulong)len, + Unsafe.Read(pbString + len - 24)); + + local_UInt128 v = WeakHashLen32WithSeeds(pbString, len - 64, (ulong)len, z); + local_UInt128 w = WeakHashLen32WithSeeds(pbString, len - 32, y + K1, x); + + x = x * K1 + Unsafe.Read(pbString); + + // For each 64-byte chunk + var grouplen = 0 + (len - (len % 64)); + + for (var currentOffset = 0; currentOffset < grouplen; currentOffset += 64) + { + x = RotateRight(x + y + v.Low + Unsafe.Read(pbString + currentOffset + 8), 37) * K1; + y = RotateRight(y + v.High + Unsafe.Read(pbString + currentOffset + 48), 42) * K1; + x ^= w.High; + y += v.Low + Unsafe.Read(pbString + currentOffset + 40); + z = RotateRight(z + w.Low, 33) * K1; + v = WeakHashLen32WithSeeds(pbString, currentOffset, v.High * K1, x + w.Low); + w = WeakHashLen32WithSeeds(pbString, currentOffset + 32, z + w.High, y + + Unsafe.Read(pbString + currentOffset + 16)); + + ulong temp = x; + x = z; + z = temp; + } + + return (long)_Hash64Len16(_Hash64Len16(v.Low, w.Low) + Mix(y) * K1 + z, + _Hash64Len16(v.High, w.High) + x); + } + else if (len > 32) { ulong mul = K2 + (ulong)len * 2; ulong a = Unsafe.Read(pbString) * K2; From 41fd8eaa9c8834b4dd765b7c962c0218e4010cd7 Mon Sep 17 00:00:00 2001 From: prvyk Date: Mon, 3 Feb 2025 00:36:41 +0200 Subject: [PATCH 7/7] dotnet format --- libs/storage/Tsavorite/cs/src/core/Utilities/Utility.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/storage/Tsavorite/cs/src/core/Utilities/Utility.cs b/libs/storage/Tsavorite/cs/src/core/Utilities/Utility.cs index de5b58425e..8583f67e67 100644 --- a/libs/storage/Tsavorite/cs/src/core/Utilities/Utility.cs +++ b/libs/storage/Tsavorite/cs/src/core/Utilities/Utility.cs @@ -181,7 +181,7 @@ public static long GetHashCode(long input) return (long)Rotr64((ulong)local_rand_hash, 45); } - + /// /// Get 64-bit hash code for a byte array ///