Teach fasthash_accum to use platform endianness for bytewise loads
authorJohn Naylor <john.naylor@postgresql.org>
Sat, 6 Apr 2024 09:59:28 +0000 (16:59 +0700)
committerJohn Naylor <john.naylor@postgresql.org>
Sat, 6 Apr 2024 10:14:28 +0000 (17:14 +0700)
This function previously used a mix of word-wise loads and bytewise
loads. The bytewise loads happened to be little-endian regardless of
platform. This in itself is not a problem. However, a future commit
will require the same result whether A) the input is loaded as a
word with the relevent bytes masked-off, or B) the input is loaded
one byte at a time.

While at it, improve debuggability of the internal hash state.

Discussion: https://postgr.es/m/CANWCAZZpuV1mES1mtSpAq8tWJewbrv4gEz6R_k4gzNG8GZ5gag%40mail.gmail.com

src/include/common/hashfn_unstable.h

index c93c9d717924143484d5bae1fed4e536dee61e17..0263b653dda4df90790fbb18d91a647941bb6e7d 100644 (file)
@@ -131,9 +131,6 @@ fasthash_combine(fasthash_state *hs)
 {
        hs->hash ^= fasthash_mix(hs->accum, 0);
        hs->hash *= 0x880355f21e6d1965;
-
-       /* reset hash state for next input */
-       hs->accum = 0;
 }
 
 /* accumulate up to 8 bytes of input and combine it into the hash */
@@ -142,9 +139,44 @@ fasthash_accum(fasthash_state *hs, const char *k, size_t len)
 {
        uint32          lower_four;
 
-       Assert(hs->accum == 0);
        Assert(len <= FH_SIZEOF_ACCUM);
+       hs->accum = 0;
 
+       /*
+        * For consistency, bytewise loads must match the platform's endianness.
+        */
+#ifdef WORDS_BIGENDIAN
+       switch (len)
+       {
+               case 8:
+                       memcpy(&hs->accum, k, 8);
+                       break;
+               case 7:
+                       hs->accum |= (uint64) k[6] << 8;
+                       /* FALLTHROUGH */
+               case 6:
+                       hs->accum |= (uint64) k[5] << 16;
+                       /* FALLTHROUGH */
+               case 5:
+                       hs->accum |= (uint64) k[4] << 24;
+                       /* FALLTHROUGH */
+               case 4:
+                       memcpy(&lower_four, k, sizeof(lower_four));
+                       hs->accum |= (uint64) lower_four << 32;
+                       break;
+               case 3:
+                       hs->accum |= (uint64) k[2] << 40;
+                       /* FALLTHROUGH */
+               case 2:
+                       hs->accum |= (uint64) k[1] << 48;
+                       /* FALLTHROUGH */
+               case 1:
+                       hs->accum |= (uint64) k[0] << 56;
+                       break;
+               case 0:
+                       return;
+       }
+#else
        switch (len)
        {
                case 8:
@@ -175,6 +207,7 @@ fasthash_accum(fasthash_state *hs, const char *k, size_t len)
                case 0:
                        return;
        }
+#endif
 
        fasthash_combine(hs);
 }
@@ -288,7 +321,8 @@ fasthash_accum_cstring(fasthash_state *hs, const char *str)
        if (PointerIsAligned(str, uint64))
        {
                len = fasthash_accum_cstring_aligned(hs, str);
-               Assert(hs_check.hash == hs->hash && len_check == len);
+               Assert(len_check == len);
+               Assert(hs_check.hash == hs->hash);
                return len;
        }
 #endif                                                 /* SIZEOF_VOID_P */