Introduce optimized routine for linear searches of arrays
authorJohn Naylor <john.naylor@postgresql.org>
Wed, 3 Aug 2022 16:49:04 +0000 (09:49 -0700)
committerJohn Naylor <john.naylor@postgresql.org>
Wed, 10 Aug 2022 03:48:29 +0000 (10:48 +0700)
Use SSE2 intrinsics to speed up the search, where available.  Otherwise,
use a simple 'for' loop.  The motivation to add this now is to speed up
XidInMVCCSnapshot(), which is the reason only unsigned 32-bit integer
arrays are optimized. Other types are left for future work, as is the
extension of this technique to non-x86 platforms.

Nathan Bossart

Reviewed by: Andres Freund, Bharath Rupireddy, Masahiko Sawada
Discussion: https://postgr.es/m/20220713170950.GA3116318%40nathanxps13

src/include/port/pg_lfind.h [new file with mode: 0644]
src/test/modules/Makefile
src/test/modules/test_lfind/.gitignore [new file with mode: 0644]
src/test/modules/test_lfind/Makefile [new file with mode: 0644]
src/test/modules/test_lfind/expected/test_lfind.out [new file with mode: 0644]
src/test/modules/test_lfind/sql/test_lfind.sql [new file with mode: 0644]
src/test/modules/test_lfind/test_lfind--1.0.sql [new file with mode: 0644]
src/test/modules/test_lfind/test_lfind.c [new file with mode: 0644]
src/test/modules/test_lfind/test_lfind.control [new file with mode: 0644]

diff --git a/src/include/port/pg_lfind.h b/src/include/port/pg_lfind.h
new file mode 100644 (file)
index 0000000..fb12597
--- /dev/null
@@ -0,0 +1,103 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_lfind.h
+ *   Optimized linear search routines.
+ *
+ * Copyright (c) 2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *   src/include/port/pg_lfind.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_LFIND_H
+#define PG_LFIND_H
+
+#include "port/simd.h"
+
+/*
+ * pg_lfind32
+ *
+ * Return true if there is an element in 'base' that equals 'key', otherwise
+ * return false.
+ */
+static inline bool
+pg_lfind32(uint32 key, uint32 *base, uint32 nelem)
+{
+   uint32      i = 0;
+
+   /* Use SIMD intrinsics where available. */
+#ifdef USE_SSE2
+
+   /*
+    * A 16-byte register only has four 4-byte lanes. For better
+    * instruction-level parallelism, each loop iteration operates on a block
+    * of four registers. Testing has showed this is ~40% faster than using a
+    * block of two registers.
+    */
+   const       __m128i keys = _mm_set1_epi32(key); /* load 4 copies of key */
+   uint32      iterations = nelem & ~0xF;  /* round down to multiple of 16 */
+
+#if defined(USE_ASSERT_CHECKING)
+   bool        assert_result = false;
+
+   /* pre-compute the result for assert checking */
+   for (i = 0; i < nelem; i++)
+   {
+       if (key == base[i])
+       {
+           assert_result = true;
+           break;
+       }
+   }
+#endif
+
+   for (i = 0; i < iterations; i += 16)
+   {
+       /* load the next block into 4 registers holding 4 values each */
+       const       __m128i vals1 = _mm_loadu_si128((__m128i *) & base[i]);
+       const       __m128i vals2 = _mm_loadu_si128((__m128i *) & base[i + 4]);
+       const       __m128i vals3 = _mm_loadu_si128((__m128i *) & base[i + 8]);
+       const       __m128i vals4 = _mm_loadu_si128((__m128i *) & base[i + 12]);
+
+       /* compare each value to the key */
+       const       __m128i result1 = _mm_cmpeq_epi32(keys, vals1);
+       const       __m128i result2 = _mm_cmpeq_epi32(keys, vals2);
+       const       __m128i result3 = _mm_cmpeq_epi32(keys, vals3);
+       const       __m128i result4 = _mm_cmpeq_epi32(keys, vals4);
+
+       /* combine the results into a single variable */
+       const       __m128i tmp1 = _mm_or_si128(result1, result2);
+       const       __m128i tmp2 = _mm_or_si128(result3, result4);
+       const       __m128i result = _mm_or_si128(tmp1, tmp2);
+
+       /* see if there was a match */
+       if (_mm_movemask_epi8(result) != 0)
+       {
+#if defined(USE_ASSERT_CHECKING)
+           Assert(assert_result == true);
+#endif
+           return true;
+       }
+   }
+#endif                         /* USE_SSE2 */
+
+   /* Process the remaining elements one at a time. */
+   for (; i < nelem; i++)
+   {
+       if (key == base[i])
+       {
+#if defined(USE_SSE2) && defined(USE_ASSERT_CHECKING)
+           Assert(assert_result == true);
+#endif
+           return true;
+       }
+   }
+
+#if defined(USE_SSE2) && defined(USE_ASSERT_CHECKING)
+   Assert(assert_result == false);
+#endif
+   return false;
+}
+
+#endif                         /* PG_LFIND_H */
index 9090226daa070eb618ea299172b1600ab0e6a2a1..6c31c8707c24dbdfb813adec7043af31eb2aaa89 100644 (file)
@@ -19,6 +19,7 @@ SUBDIRS = \
          test_extensions \
          test_ginpostinglist \
          test_integerset \
+         test_lfind \
          test_misc \
          test_oat_hooks \
          test_parser \
diff --git a/src/test/modules/test_lfind/.gitignore b/src/test/modules/test_lfind/.gitignore
new file mode 100644 (file)
index 0000000..5dcb3ff
--- /dev/null
@@ -0,0 +1,4 @@
+# Generated subdirectories
+/log/
+/results/
+/tmp_check/
diff --git a/src/test/modules/test_lfind/Makefile b/src/test/modules/test_lfind/Makefile
new file mode 100644 (file)
index 0000000..00ba56f
--- /dev/null
@@ -0,0 +1,23 @@
+# src/test/modules/test_lfind/Makefile
+
+MODULE_big = test_lfind
+OBJS = \
+   $(WIN32RES) \
+   test_lfind.o
+PGFILEDESC = "test_lfind - test code for optimized linear search functions"
+
+EXTENSION = test_lfind
+DATA = test_lfind--1.0.sql
+
+REGRESS = test_lfind
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = src/test/modules/test_lfind
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/src/test/modules/test_lfind/expected/test_lfind.out b/src/test/modules/test_lfind/expected/test_lfind.out
new file mode 100644 (file)
index 0000000..222c8fd
--- /dev/null
@@ -0,0 +1,12 @@
+CREATE EXTENSION test_lfind;
+--
+-- These tests don't produce any interesting output.  We're checking that
+-- the operations complete without crashing or hanging and that none of their
+-- internal sanity tests fail.
+--
+SELECT test_lfind();
+ test_lfind 
+------------
+(1 row)
+
diff --git a/src/test/modules/test_lfind/sql/test_lfind.sql b/src/test/modules/test_lfind/sql/test_lfind.sql
new file mode 100644 (file)
index 0000000..899f1dd
--- /dev/null
@@ -0,0 +1,8 @@
+CREATE EXTENSION test_lfind;
+
+--
+-- These tests don't produce any interesting output.  We're checking that
+-- the operations complete without crashing or hanging and that none of their
+-- internal sanity tests fail.
+--
+SELECT test_lfind();
diff --git a/src/test/modules/test_lfind/test_lfind--1.0.sql b/src/test/modules/test_lfind/test_lfind--1.0.sql
new file mode 100644 (file)
index 0000000..d82ab05
--- /dev/null
@@ -0,0 +1,8 @@
+/* src/test/modules/test_lfind/test_lfind--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION test_lfind" to load this file. \quit
+
+CREATE FUNCTION test_lfind()
+   RETURNS pg_catalog.void
+   AS 'MODULE_PATHNAME' LANGUAGE C;
diff --git a/src/test/modules/test_lfind/test_lfind.c b/src/test/modules/test_lfind/test_lfind.c
new file mode 100644 (file)
index 0000000..a000746
--- /dev/null
@@ -0,0 +1,52 @@
+/*--------------------------------------------------------------------------
+ *
+ * test_lfind.c
+ *     Test correctness of optimized linear search functions.
+ *
+ * Copyright (c) 2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *     src/test/modules/test_lfind/test_lfind.c
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "fmgr.h"
+#include "port/pg_lfind.h"
+
+PG_MODULE_MAGIC;
+
+PG_FUNCTION_INFO_V1(test_lfind);
+
+Datum
+test_lfind(PG_FUNCTION_ARGS)
+{
+#define TEST_ARRAY_SIZE 135
+   uint32      test_array[TEST_ARRAY_SIZE] = {0};
+
+   test_array[8] = 1;
+   test_array[64] = 2;
+   test_array[TEST_ARRAY_SIZE - 1] = 3;
+
+   if (pg_lfind32(1, test_array, 4))
+       elog(ERROR, "pg_lfind32() found nonexistent element");
+   if (!pg_lfind32(1, test_array, TEST_ARRAY_SIZE))
+       elog(ERROR, "pg_lfind32() did not find existing element");
+
+   if (pg_lfind32(2, test_array, 32))
+       elog(ERROR, "pg_lfind32() found nonexistent element");
+   if (!pg_lfind32(2, test_array, TEST_ARRAY_SIZE))
+       elog(ERROR, "pg_lfind32() did not find existing element");
+
+   if (pg_lfind32(3, test_array, 96))
+       elog(ERROR, "pg_lfind32() found nonexistent element");
+   if (!pg_lfind32(3, test_array, TEST_ARRAY_SIZE))
+       elog(ERROR, "pg_lfind32() did not find existing element");
+
+   if (pg_lfind32(4, test_array, TEST_ARRAY_SIZE))
+       elog(ERROR, "pg_lfind32() found nonexistent element");
+
+   PG_RETURN_VOID();
+}
diff --git a/src/test/modules/test_lfind/test_lfind.control b/src/test/modules/test_lfind/test_lfind.control
new file mode 100644 (file)
index 0000000..d8b57df
--- /dev/null
@@ -0,0 +1,4 @@
+comment = 'Test code for optimized linear search functions'
+default_version = '1.0'
+module_pathname = '$libdir/test_lfind'
+relocatable = true