Extend the options of pg_basebackup to control compression
authorMichael Paquier <michael@paquier.xyz>
Fri, 21 Jan 2022 02:08:43 +0000 (11:08 +0900)
committerMichael Paquier <michael@paquier.xyz>
Fri, 21 Jan 2022 02:08:43 +0000 (11:08 +0900)
The option --compress is extended to accept a compression method and an
optional compression level, as of the grammar METHOD[:LEVEL].  The
methods currently support are "none" and "gzip", for client-side
compression.  Any of those methods use only an integer value for the
compression level, but any method implemented in the future could use
more specific keywords if necessary.

This commit keeps the logic backward-compatible.  Hence, the following
compatibility rules apply for the new format of the option --compress:
* -z/--gzip is a synonym of --compress=gzip.
* --compress=NUM implies:
** --compress=none if NUM = 0.
** --compress=gzip:NUM if NUM > 0.

Note that there are also plans to extend more this grammar with
server-side compression.

Reviewed-by: Robert Haas, Magnus Hagander, Álvaro Herrera, David
G. Johnston, Georgios Kokolatos
Discussion: https://postgr.es/m/Yb3GEgWwcu4wZDuA@paquier.xyz

doc/src/sgml/ref/pg_basebackup.sgml
src/bin/pg_basebackup/pg_basebackup.c
src/bin/pg_basebackup/t/010_pg_basebackup.pl

index 8422cd43043a86cf387b4b7b34e111af6caa80e8..47d11289beec40c7981ac2eaff83a4d495e8f98e 100644 (file)
@@ -398,15 +398,24 @@ PostgreSQL documentation
 
      <varlistentry>
       <term><option>-Z <replaceable class="parameter">level</replaceable></option></term>
+      <term><option>-Z <replaceable class="parameter">method</replaceable></option>[:<replaceable>level</replaceable>]</term>
       <term><option>--compress=<replaceable class="parameter">level</replaceable></option></term>
+      <term><option>--compress=<replaceable class="parameter">method</replaceable></option>[:<replaceable>level</replaceable>]</term>
       <listitem>
        <para>
-        Enables gzip compression of tar file output, and specifies the
+        Enables compression of tar file output, and specifies the
         compression level (0 through 9, 0 being no compression and 9 being best
         compression). Compression is only available when using the tar
         format, and the suffix <filename>.gz</filename> will
         automatically be added to all tar filenames.
        </para>
+       <para>
+        The compression method can be set to either <literal>gzip</literal>
+        for compression with <application>gzip</application>, or
+        <literal>none</literal> for no compression. A compression level
+        can be optionally specified, by appending the level number after a
+        colon (<literal>:</literal>).
+       </para>
       </listitem>
      </varlistentry>
     </variablelist>
@@ -942,6 +951,16 @@ PostgreSQL documentation
 <screen>
 <prompt>$</prompt> <userinput>pg_basebackup -D backup/data -T /opt/ts=$(pwd)/backup/ts</userinput>
 </screen></para>
+
+  <para>
+   To create a backup of a local server with one tar file for each tablespace
+   compressed with <application>gzip</application> at level 9, stored in the
+   directory <filename>backup</filename>:
+<screen>
+<prompt>$</prompt> <userinput>pg_basebackup -D backup -Ft --compress=gzip:9</userinput>
+</screen>
+  </para>
+
  </refsect1>
 
  <refsect1>
index ec3b4f3c174a488b3564bf7c3fe9d340be31604a..d5b0ade10d5e661b301c931af0835f911e54f0fa 100644 (file)
@@ -123,6 +123,7 @@ static bool showprogress = false;
 static bool estimatesize = true;
 static int verbose = 0;
 static int compresslevel = 0;
+static WalCompressionMethod compressmethod = COMPRESSION_NONE;
 static IncludeWal includewal = STREAM_WAL;
 static bool fastcheckpoint = false;
 static bool writerecoveryconf = false;
@@ -379,7 +380,8 @@ usage(void)
    printf(_("  -X, --wal-method=none|fetch|stream\n"
             "                         include required WAL files with specified method\n"));
    printf(_("  -z, --gzip             compress tar output\n"));
-   printf(_("  -Z, --compress=0-9     compress tar output with given compression level\n"));
+   printf(_("  -Z, --compress={gzip,none}[:LEVEL] or [LEVEL]\n"
+            "                         compress tar output with given compression method or level\n"));
    printf(_("\nGeneral options:\n"));
    printf(_("  -c, --checkpoint=fast|spread\n"
             "                         set fast or spread checkpointing\n"));
@@ -544,8 +546,7 @@ LogStreamerMain(logstreamer_param *param)
                                                    stream.do_sync);
    else
        stream.walmethod = CreateWalTarMethod(param->xlog,
-                                             (compresslevel != 0) ?
-                                             COMPRESSION_GZIP : COMPRESSION_NONE,
+                                             compressmethod,
                                              compresslevel,
                                              stream.do_sync);
 
@@ -936,6 +937,81 @@ parse_max_rate(char *src)
    return (int32) result;
 }
 
+/*
+ * Utility wrapper to parse the values specified for -Z/--compress.
+ * *methodres and *levelres will be optionally filled with values coming
+ * from the parsed results.
+ */
+static void
+parse_compress_options(char *src, WalCompressionMethod *methodres,
+                      int *levelres)
+{
+   char       *sep;
+   int         firstlen;
+   char       *firstpart = NULL;
+
+   /* check if the option is split in two */
+   sep = strchr(src, ':');
+
+   /*
+    * The first part of the option value could be a method name, or just a
+    * level value.
+    */
+   firstlen = (sep != NULL) ? (sep - src) : strlen(src);
+   firstpart = pg_malloc(firstlen + 1);
+   strncpy(firstpart, src, firstlen);
+   firstpart[firstlen] = '\0';
+
+   /*
+    * Check if the first part of the string matches with a supported
+    * compression method.
+    */
+   if (pg_strcasecmp(firstpart, "gzip") == 0)
+       *methodres = COMPRESSION_GZIP;
+   else if (pg_strcasecmp(firstpart, "none") == 0)
+       *methodres = COMPRESSION_NONE;
+   else
+   {
+       /*
+        * It does not match anything known, so check for the
+        * backward-compatible case of only an integer where the implied
+        * compression method changes depending on the level value.
+        */
+       if (!option_parse_int(firstpart, "-Z/--compress", 0,
+                             INT_MAX, levelres))
+           exit(1);
+
+       *methodres = (*levelres > 0) ?
+           COMPRESSION_GZIP : COMPRESSION_NONE;
+       return;
+   }
+
+   if (sep == NULL)
+   {
+       /*
+        * The caller specified a method without a colon separator, so let any
+        * subsequent checks assign a default level.
+        */
+       return;
+   }
+
+   /* Check the contents after the colon separator. */
+   sep++;
+   if (*sep == '\0')
+   {
+       pg_log_error("no compression level defined for method %s", firstpart);
+       exit(1);
+   }
+
+   /*
+    * For any of the methods currently supported, the data after the
+    * separator can just be an integer.
+    */
+   if (!option_parse_int(sep, "-Z/--compress", 0, INT_MAX,
+                         levelres))
+       exit(1);
+}
+
 /*
  * Read a stream of COPY data and invoke the provided callback for each
  * chunk.
@@ -996,7 +1072,7 @@ CreateBackupStreamer(char *archive_name, char *spclocation,
                     bool is_recovery_guc_supported,
                     bool expect_unterminated_tarfile)
 {
-   bbstreamer *streamer;
+   bbstreamer *streamer = NULL;
    bbstreamer *manifest_inject_streamer = NULL;
    bool        inject_manifest;
    bool        must_parse_archive;
@@ -1055,19 +1131,22 @@ CreateBackupStreamer(char *archive_name, char *spclocation,
            archive_file = NULL;
        }
 
+       if (compressmethod == COMPRESSION_NONE)
+           streamer = bbstreamer_plain_writer_new(archive_filename,
+                                                  archive_file);
 #ifdef HAVE_LIBZ
-       if (compresslevel != 0)
+       else if (compressmethod == COMPRESSION_GZIP)
        {
            strlcat(archive_filename, ".gz", sizeof(archive_filename));
            streamer = bbstreamer_gzip_writer_new(archive_filename,
                                                  archive_file,
                                                  compresslevel);
        }
-       else
 #endif
-           streamer = bbstreamer_plain_writer_new(archive_filename,
-                                                  archive_file);
-
+       else
+       {
+           Assert(false);      /* not reachable */
+       }
 
        /*
         * If we need to parse the archive for whatever reason, then we'll
@@ -2279,11 +2358,11 @@ main(int argc, char **argv)
 #else
                compresslevel = 1;  /* will be rejected below */
 #endif
+               compressmethod = COMPRESSION_GZIP;
                break;
            case 'Z':
-               if (!option_parse_int(optarg, "-Z/--compress", 0, 9,
-                                     &compresslevel))
-                   exit(1);
+               parse_compress_options(optarg, &compressmethod,
+                                      &compresslevel);
                break;
            case 'c':
                if (pg_strcasecmp(optarg, "fast") == 0)
@@ -2412,7 +2491,7 @@ main(int argc, char **argv)
    /*
     * Compression doesn't make sense unless tar format is in use.
     */
-   if (format == 'p' && compresslevel != 0)
+   if (format == 'p' && compressmethod != COMPRESSION_NONE)
    {
        if (backup_target == NULL)
            pg_log_error("only tar mode backups can be compressed");
@@ -2516,14 +2595,43 @@ main(int argc, char **argv)
        }
    }
 
-#ifndef HAVE_LIBZ
-   /* Sanity checks for compression level. */
-   if (compresslevel != 0)
+   /* Sanity checks for compression-related options. */
+   switch (compressmethod)
    {
-       pg_log_error("this build does not support compression");
-       exit(1);
-   }
+       case COMPRESSION_NONE:
+           if (compresslevel != 0)
+           {
+               pg_log_error("cannot use compression level with method %s",
+                            "none");
+               fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
+                       progname);
+               exit(1);
+           }
+           break;
+       case COMPRESSION_GZIP:
+#ifdef HAVE_LIBZ
+           if (compresslevel == 0)
+           {
+               pg_log_info("no value specified for compression level, switching to default");
+               compresslevel = Z_DEFAULT_COMPRESSION;
+           }
+           if (compresslevel > 9)
+           {
+               pg_log_error("compression level %d of method %s higher than maximum of 9",
+                            compresslevel, "gzip");
+               exit(1);
+           }
+#else
+           pg_log_error("this build does not support compression with %s",
+                        "gzip");
+           exit(1);
 #endif
+           break;
+       case COMPRESSION_LZ4:
+           /* option not supported */
+           Assert(false);
+           break;
+   }
 
    /*
     * Sanity checks for progress reporting options.
index f7e21941ebf78357d4b1fbf9c81aa2cba9eda00a..95a6bd6778f0ccec64672360b9c4a2f0eb5b2dd8 100644 (file)
@@ -10,7 +10,7 @@ use File::Path qw(rmtree);
 use Fcntl qw(:seek);
 use PostgreSQL::Test::Cluster;
 use PostgreSQL::Test::Utils;
-use Test::More tests => 135;
+use Test::More tests => 143;
 
 program_help_ok('pg_basebackup');
 program_version_ok('pg_basebackup');
@@ -38,6 +38,20 @@ my $pgdata = $node->data_dir;
 $node->command_fails(['pg_basebackup'],
    'pg_basebackup needs target directory specified');
 
+# Sanity checks for options
+$node->command_fails_like(
+   [ 'pg_basebackup', '-D', "$tempdir/backup", '--compress', 'none:1' ],
+   qr/\Qpg_basebackup: error: cannot use compression level with method none/,
+   'failure if method "none" specified with compression level');
+$node->command_fails_like(
+   [ 'pg_basebackup', '-D', "$tempdir/backup", '--compress', 'none+' ],
+   qr/\Qpg_basebackup: error: invalid value "none+" for option/,
+   'failure on incorrect separator to define compression level');
+$node->command_fails_like(
+   [ 'pg_basebackup', '-D', "$tempdir/backup", '--compress', 'none:' ],
+   qr/\Qpg_basebackup: error: no compression level defined for method none/,
+   'failure on missing compression level value');
+
 # Some Windows ANSI code pages may reject this filename, in which case we
 # quietly proceed without this bit of test coverage.
 if (open my $badchars, '>>', "$tempdir/pgdata/FOO\xe0\xe0\xe0BAR")
@@ -699,7 +713,7 @@ note "Testing pg_basebackup with compression methods";
 # Check ZLIB compression if available.
 SKIP:
 {
-   skip "postgres was not built with ZLIB support", 5
+   skip "postgres was not built with ZLIB support", 7
      if (!check_pg_config("#define HAVE_LIBZ 1"));
 
    $node->command_ok(
@@ -717,15 +731,28 @@ SKIP:
            '--format',              't'
        ],
        'pg_basebackup with --gzip');
+   $node->command_ok(
+       [
+           @pg_basebackup_defs,     '-D',
+           "$tempdir/backup_gzip3", '--compress',
+           'gzip:1',                '--format',
+           't'
+       ],
+       'pg_basebackup with --compress=gzip:1');
 
    # Verify that the stored files are generated with their expected
    # names.
    my @zlib_files = glob "$tempdir/backup_gzip/*.tar.gz";
    is(scalar(@zlib_files), 2,
-       "two files created with --compress (base.tar.gz and pg_wal.tar.gz)");
+       "two files created with --compress=NUM (base.tar.gz and pg_wal.tar.gz)"
+   );
    my @zlib_files2 = glob "$tempdir/backup_gzip2/*.tar.gz";
    is(scalar(@zlib_files2), 2,
        "two files created with --gzip (base.tar.gz and pg_wal.tar.gz)");
+   my @zlib_files3 = glob "$tempdir/backup_gzip3/*.tar.gz";
+   is(scalar(@zlib_files3), 2,
+       "two files created with --compress=gzip:NUM (base.tar.gz and pg_wal.tar.gz)"
+   );
 
    # Check the integrity of the files generated.
    my $gzip = $ENV{GZIP_PROGRAM};
@@ -735,8 +762,9 @@ SKIP:
        || system_log($gzip, '--version') != 0);
 
    my $gzip_is_valid =
-     system_log($gzip, '--test', @zlib_files, @zlib_files2);
+     system_log($gzip, '--test', @zlib_files, @zlib_files2, @zlib_files3);
    is($gzip_is_valid, 0, "gzip verified the integrity of compressed data");
    rmtree("$tempdir/backup_gzip");
    rmtree("$tempdir/backup_gzip2");
+   rmtree("$tempdir/backup_gzip3");
 }