Add support for zstd with compression of full-page writes in WAL
authorMichael Paquier <michael@paquier.xyz>
Fri, 11 Mar 2022 03:18:53 +0000 (12:18 +0900)
committerMichael Paquier <michael@paquier.xyz>
Fri, 11 Mar 2022 03:18:53 +0000 (12:18 +0900)
wal_compression gains a new value, "zstd", to allow the compression of
full-page images using the compression method of the same name.

Compression is done using the default level recommended by the library,
as of ZSTD_CLEVEL_DEFAULT = 3.  Some benchmarking has shown that it
could make sense to use a level lower for the FPI compression, like 1 or
2, as the compression rate did not change much with a bit less CPU
consumed, but any tests done would only cover few scenarios so it is
hard to come to a clear conclusion.  Anyway, there is no reason to not
use the default level instead, which is the level recommended by the
library so it should be fine for most cases.

zstd outclasses easily pglz, and is better than LZ4 where one wants to
have more compression at the cost of extra CPU but both are good enough
in their own scenarios, so the choice between one or the other of these
comes to a study of the workload patterns and the schema involved,
mainly.

This commit relies heavily on 4035cd5, that reshaped the code creating
and restoring full-page writes to be aware of the compression type,
making this integration straight-forward.

This patch borrows some early work from Andrey Borodin, though the patch
got a complete rewrite.

Author: Justin Pryzby
Discussion: https://postgr.es/m/20220222231948.GJ9008@telsasoft.com

doc/src/sgml/config.sgml
doc/src/sgml/installation.sgml
src/backend/access/transam/xloginsert.c
src/backend/access/transam/xlogreader.c
src/backend/utils/misc/guc.c
src/backend/utils/misc/postgresql.conf.sample
src/bin/pg_waldump/pg_waldump.c
src/include/access/xlog.h
src/include/access/xlogrecord.h

index 7ed8c82a9dd79f4938be912670470518860a604c..5612e804533ae84c076f513d8f54440a5a383160 100644 (file)
@@ -3154,10 +3154,13 @@ include_dir 'conf.d'
         server compresses full page images written to WAL when
         <xref linkend="guc-full-page-writes"/> is on or during a base backup.
         A compressed page image will be decompressed during WAL replay.
-        The supported methods are <literal>pglz</literal> and
-        <literal>lz4</literal> (if <productname>PostgreSQL</productname> was
-        compiled with <option>--with-lz4</option>). The default value is
-        <literal>off</literal>. Only superusers can change this setting.
+        The supported methods are <literal>pglz</literal>,
+        <literal>lz4</literal> (if <productname>PostgreSQL</productname>
+        was compiled with <option>--with-lz4</option>) and
+        <literal>zstd</literal> (if <productname>PostgreSQL</productname>
+        was compiled with <option>--with-zstd</option>) and
+        The default value is <literal>off</literal>.
+        Only superusers can change this setting.
        </para>
 
        <para>
index 0f7425259080054a10e8a5b0c8292f88d0169be3..a239bbef2f5a7d3a0c24dc05216bf5db8e98eda9 100644 (file)
@@ -271,6 +271,14 @@ su - postgres
      </para>
     </listitem>
 
+    <listitem>
+     <para>
+      You need <productname>zstd</productname>, if you want to support
+      compression of data with this method; see
+      <xref linkend="guc-wal-compression"/>.
+     </para>
+    </listitem>
+
     <listitem>
      <para>
       To build the <productname>PostgreSQL</productname> documentation,
index 83d40b55e6157c4ab5d48068fa469581a9f6bcb1..f4eb54b63c43fafe51f34e65f4e2a49b3f85c60f 100644 (file)
 #include <lz4.h>
 #endif
 
+#ifdef USE_ZSTD
+#include <zstd.h>
+#endif
+
 #include "access/xact.h"
 #include "access/xlog.h"
 #include "access/xlog_internal.h"
 #define LZ4_MAX_BLCKSZ     0
 #endif
 
+#ifdef USE_ZSTD
+#define ZSTD_MAX_BLCKSZ        ZSTD_COMPRESSBOUND(BLCKSZ)
+#else
+#define ZSTD_MAX_BLCKSZ        0
+#endif
+
 #define PGLZ_MAX_BLCKSZ        PGLZ_MAX_OUTPUT(BLCKSZ)
 
-#define COMPRESS_BUFSIZE   Max(PGLZ_MAX_BLCKSZ, LZ4_MAX_BLCKSZ)
+/* Buffer size required to store a compressed version of backup block image */
+#define COMPRESS_BUFSIZE   Max(Max(PGLZ_MAX_BLCKSZ, LZ4_MAX_BLCKSZ), ZSTD_MAX_BLCKSZ)
 
 /*
  * For each block reference registered with XLogRegisterBuffer, we fill in
@@ -698,6 +709,14 @@ XLogRecordAssemble(RmgrId rmid, uint8 info,
 #endif
                        break;
 
+                   case WAL_COMPRESSION_ZSTD:
+#ifdef USE_ZSTD
+                       bimg.bimg_info |= BKPIMAGE_COMPRESS_ZSTD;
+#else
+                       elog(ERROR, "zstd is not supported by this build");
+#endif
+                       break;
+
                    case WAL_COMPRESSION_NONE:
                        Assert(false);  /* cannot happen */
                        break;
@@ -906,6 +925,17 @@ XLogCompressBackupBlock(char *page, uint16 hole_offset, uint16 hole_length,
 #endif
            break;
 
+       case WAL_COMPRESSION_ZSTD:
+#ifdef USE_ZSTD
+           len = ZSTD_compress(dest, COMPRESS_BUFSIZE, source, orig_len,
+                               ZSTD_CLEVEL_DEFAULT);
+           if (ZSTD_isError(len))
+               len = -1;       /* failure */
+#else
+           elog(ERROR, "zstd is not supported by this build");
+#endif
+           break;
+
        case WAL_COMPRESSION_NONE:
            Assert(false);      /* cannot happen */
            break;
index 35029cf97d62517576fabc9a77cca8408363f3cf..b7c06da2557797938096ba2a4338a79445c36412 100644 (file)
@@ -21,6 +21,9 @@
 #ifdef USE_LZ4
 #include <lz4.h>
 #endif
+#ifdef USE_ZSTD
+#include <zstd.h>
+#endif
 
 #include "access/transam.h"
 #include "access/xlog_internal.h"
@@ -1618,6 +1621,23 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
                                  "LZ4",
                                  block_id);
            return false;
+#endif
+       }
+       else if ((bkpb->bimg_info & BKPIMAGE_COMPRESS_ZSTD) != 0)
+       {
+#ifdef USE_ZSTD
+           size_t      decomp_result = ZSTD_decompress(tmp.data,
+                                                       BLCKSZ - bkpb->hole_length,
+                                                       ptr, bkpb->bimg_len);
+
+           if (ZSTD_isError(decomp_result))
+               decomp_success = false;
+#else
+           report_invalid_record(record, "image at %X/%X compressed with %s not supported by build, block %d",
+                                 LSN_FORMAT_ARGS(record->ReadRecPtr),
+                                 "zstd",
+                                 block_id);
+           return false;
 #endif
        }
        else
index 6d11f9c71b9997acc89b2e6bd88cfdc8ba95afd1..e7f0a380e60185a4c825db88c9aa55eacdd723f8 100644 (file)
@@ -550,6 +550,9 @@ static const struct config_enum_entry wal_compression_options[] = {
    {"pglz", WAL_COMPRESSION_PGLZ, false},
 #ifdef USE_LZ4
    {"lz4", WAL_COMPRESSION_LZ4, false},
+#endif
+#ifdef USE_ZSTD
+   {"zstd", WAL_COMPRESSION_ZSTD, false},
 #endif
    {"on", WAL_COMPRESSION_PGLZ, false},
    {"off", WAL_COMPRESSION_NONE, false},
index 4a094bb38be5415239165cf42fff7238eeedad23..4cf5b26a3638b1454ea5eec65d7769ab33c2568a 100644 (file)
 #wal_log_hints = off           # also do full page writes of non-critical updates
                    # (change requires restart)
 #wal_compression = off         # enables compression of full-page writes;
-                   # off, pglz, lz4, or on
+                   # off, pglz, lz4, zstd, or on
 #wal_init_zero = on            # zero-fill new WAL files
 #wal_recycle = on          # recycle WAL files
 #wal_buffers = -1          # min 32kB, -1 sets based on shared_buffers
index 2340dc247b0edaa45655cee0fc22b1b22684c67f..f128050b4eae53d5d4cf71f28f97cb2ddfe77d47 100644 (file)
@@ -562,6 +562,8 @@ XLogDumpDisplayRecord(XLogDumpConfig *config, XLogReaderState *record)
                        method = "pglz";
                    else if ((bimg_info & BKPIMAGE_COMPRESS_LZ4) != 0)
                        method = "lz4";
+                   else if ((bimg_info & BKPIMAGE_COMPRESS_ZSTD) != 0)
+                       method = "zstd";
                    else
                        method = "unknown";
 
index 4b45ac64db87c17569608cd3187cb9a2512a5f87..09f6464331bcd4897042736ab09e954a2db248c1 100644 (file)
@@ -75,7 +75,8 @@ typedef enum WalCompression
 {
    WAL_COMPRESSION_NONE = 0,
    WAL_COMPRESSION_PGLZ,
-   WAL_COMPRESSION_LZ4
+   WAL_COMPRESSION_LZ4,
+   WAL_COMPRESSION_ZSTD
 } WalCompression;
 
 /* Recovery states */
index c1b1137aa7adc7de83b19f9363e1422de8a87b37..052ac6817a6020d1979d54a76007165f4320a968 100644 (file)
@@ -149,8 +149,11 @@ typedef struct XLogRecordBlockImageHeader
 /* compression methods supported */
 #define BKPIMAGE_COMPRESS_PGLZ 0x04
 #define BKPIMAGE_COMPRESS_LZ4  0x08
+#define BKPIMAGE_COMPRESS_ZSTD 0x10
+
 #define    BKPIMAGE_COMPRESSED(info) \
-   ((info & (BKPIMAGE_COMPRESS_PGLZ | BKPIMAGE_COMPRESS_LZ4)) != 0)
+   ((info & (BKPIMAGE_COMPRESS_PGLZ | BKPIMAGE_COMPRESS_LZ4 | \
+             BKPIMAGE_COMPRESS_ZSTD)) != 0)
 
 /*
  * Extra header information used when page image has "hole" and