summaryrefslogtreecommitdiff
path: root/src/port
diff options
context:
space:
mode:
authorJohn Naylor2023-08-10 04:36:15 +0000
committerJohn Naylor2023-08-10 04:36:15 +0000
commit4d14ccd6af6e788a7b79ff3ed77bda5bc71d2edc (patch)
treebef29664090f4cad6db22352d3c8a401bf724c27 /src/port
parentfa2e874946c5b9f23394358c131e987df7cc8ffb (diff)
Use native CRC instructions on 64-bit LoongArch
As with the Intel and Arm CRC instructions, compiler intrinsics for them must be supported by the compiler. In contrast, no runtime check is needed. Aligned memory access is faster, so use the Arm coding as a model. YANG Xudong Discussion: https://postgr.es/m/b522a0c5-e3b2-99cc-6387-58134fb88cbe%40ymatrix.cn
Diffstat (limited to 'src/port')
-rw-r--r--src/port/meson.build3
-rw-r--r--src/port/pg_crc32c_loongarch.c73
2 files changed, 76 insertions, 0 deletions
diff --git a/src/port/meson.build b/src/port/meson.build
index 9d0cd93c438..deb354418db 100644
--- a/src/port/meson.build
+++ b/src/port/meson.build
@@ -92,6 +92,9 @@ replace_funcs_pos = [
['pg_crc32c_armv8_choose', 'USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK'],
['pg_crc32c_sb8', 'USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK'],
+ # loongarch
+ ['pg_crc32c_loongarch', 'USE_LOONGARCH_CRC32C'],
+
# generic fallback
['pg_crc32c_sb8', 'USE_SLICING_BY_8_CRC32C'],
]
diff --git a/src/port/pg_crc32c_loongarch.c b/src/port/pg_crc32c_loongarch.c
new file mode 100644
index 00000000000..db9da80e1bf
--- /dev/null
+++ b/src/port/pg_crc32c_loongarch.c
@@ -0,0 +1,73 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_crc32c_loongarch.c
+ * Compute CRC-32C checksum using LoongArch CRCC instructions
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/port/pg_crc32c_loongarch.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "c.h"
+
+#include "port/pg_crc32c.h"
+
+pg_crc32c
+pg_comp_crc32c_loongarch(pg_crc32c crc, const void *data, size_t len)
+{
+ const unsigned char *p = data;
+ const unsigned char *pend = p + len;
+
+ /*
+ * LoongArch doesn't require alignment, but aligned memory access is
+ * significantly faster. Process leading bytes so that the loop below
+ * starts with a pointer aligned to eight bytes.
+ */
+ if (!PointerIsAligned(p, uint16) &&
+ p + 1 <= pend)
+ {
+ crc = __builtin_loongarch_crcc_w_b_w(*p, crc);
+ p += 1;
+ }
+ if (!PointerIsAligned(p, uint32) &&
+ p + 2 <= pend)
+ {
+ crc = __builtin_loongarch_crcc_w_h_w(*(uint16 *) p, crc);
+ p += 2;
+ }
+ if (!PointerIsAligned(p, uint64) &&
+ p + 4 <= pend)
+ {
+ crc = __builtin_loongarch_crcc_w_w_w(*(uint32 *) p, crc);
+ p += 4;
+ }
+
+ /* Process eight bytes at a time, as far as we can. */
+ while (p + 8 <= pend)
+ {
+ crc = __builtin_loongarch_crcc_w_d_w(*(uint64 *) p, crc);
+ p += 8;
+ }
+
+ /* Process remaining 0-7 bytes. */
+ if (p + 4 <= pend)
+ {
+ crc = __builtin_loongarch_crcc_w_w_w(*(uint32 *) p, crc);
+ p += 4;
+ }
+ if (p + 2 <= pend)
+ {
+ crc = __builtin_loongarch_crcc_w_h_w(*(uint16 *) p, crc);
+ p += 2;
+ }
+ if (p < pend)
+ {
+ crc = __builtin_loongarch_crcc_w_b_w(*p, crc);
+ }
+
+ return crc;
+}