Speed up tail processing when hashing aligned C strings

author John Naylor <john.naylor@postgresql.org>

Sun, 31 Mar 2024 05:19:16 +0000 (12:19 +0700)

committer John Naylor <john.naylor@postgresql.org>

Sun, 31 Mar 2024 05:19:16 +0000 (12:19 +0700)
author John Naylor <john.naylor@postgresql.org>
Sun, 31 Mar 2024 05:19:16 +0000 (12:19 +0700)
committer John Naylor <john.naylor@postgresql.org>
Sun, 31 Mar 2024 05:19:16 +0000 (12:19 +0700)
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h

index 791750d136c48b4818a1c3f41b4681fa52b45b14..bd7323fe052bd18f33c6ecadc4c5c618bfe9668e 100644 (file)
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -219,8 +219,9 @@ static inline size_t
  fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
  {
         const char *const start = str;
-       size_t          remainder;
+       uint64          chunk;
         uint64          zero_byte_low;
+       uint64          mask;
  
         Assert(PointerIsAligned(start, uint64));
  
@@ -239,7 +240,7 @@ fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
          */
         for (;;)
         {
-               uint64          chunk = *(uint64 *) str;
+               chunk = *(uint64 *) str;
  
  #ifdef WORDS_BIGENDIAN
                 zero_byte_low = haszero64(pg_bswap64(chunk));
@@ -254,14 +255,37 @@ fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
                 str += FH_SIZEOF_ACCUM;
         }
  
-       /*
-        * The byte corresponding to the NUL will be 0x80, so the rightmost bit
-        * position will be in the range 7, 15, ..., 63. Turn this into byte
-        * position by dividing by 8.
-        */
-       remainder = pg_rightmost_one_pos64(zero_byte_low) / BITS_PER_BYTE;
-       fasthash_accum(hs, str, remainder);
-       str += remainder;
+       if (zero_byte_low & 0xFF)
+       {
+               /*
+                * The next byte in the input is the NUL terminator, so we have
+                * nothing to do.
+                */
+       }
+       else
+       {
+               /*
+                * Create a mask for the remaining bytes so we can combine them into
+                * the hash. The mask also covers the NUL terminator, but that's
+                * harmless. The mask could contain 0x80 in bytes corresponding to the
+                * input past the terminator, but only where the input byte is zero or
+                * one, so also harmless.
+                */
+               mask = zero_byte_low | (zero_byte_low - 1);
+#ifdef WORDS_BIGENDIAN
+               /* need to mask the upper bytes */
+               mask = pg_bswap64(mask);
+#endif
+               hs->accum = chunk & mask;
+               fasthash_combine(hs);
+
+               /*
+                * The byte corresponding to the NUL will be 0x80, so the rightmost
+                * bit position will be in the range 15, 23, ..., 63. Turn this into
+                * byte position by dividing by 8.
+                */
+               str += pg_rightmost_one_pos64(zero_byte_low) / BITS_PER_BYTE;
+       }
  
         return str - start;
  }
author	John Naylor <john.naylor@postgresql.org>
	Sun, 31 Mar 2024 05:19:16 +0000 (12:19 +0700)
committer	John Naylor <john.naylor@postgresql.org>
	Sun, 31 Mar 2024 05:19:16 +0000 (12:19 +0700)