Have the server properly terminate tar archives.
authorRobert Haas <rhaas@postgresql.org>
Tue, 9 Nov 2021 19:21:35 +0000 (14:21 -0500)
committerRobert Haas <rhaas@postgresql.org>
Tue, 9 Nov 2021 19:29:15 +0000 (14:29 -0500)
Earlier versions of PostgreSQL featured a version of pg_basebackup
that wanted to edit tar archives but was too dumb to parse them
properly. The server made things easier for the client by failing
to add the two blocks of zero bytes that ought to end a tar file,
leaving it up to the client to do that.

But since commit 23a1c6578c87fca0e361c4f5f9a07df5ae1f9858, we
don't need this hack any more, because pg_basebackup is now smarter
and can parse tar files even if they are properly terminated! So
change the server to always properly terminate the tar files. Older
versions of pg_basebackup can't talk to new servers anyway, so
there's no compatibility break.

On the pg_basebackup side, we see still need to add the terminating
zero bytes if we're talking to an older server, but not when the
server is v15+. Hopefully at some point we'll be able to remove
some of this compatibility cruft, but it seems best to hang on to
it for now.

In passing, add a file header comment to bbstreamer_tar.c, to make
it clearer what's going on here.

Discussion: http://postgr.es/m/CA+TgmoZbNzsWwM4BE5Jb_qHncY817DYZwGf+2-7hkMQ27ZwsMQ@mail.gmail.com

src/backend/replication/basebackup.c
src/bin/pg_basebackup/bbstreamer_tar.c
src/bin/pg_basebackup/pg_basebackup.c

index 38c82c461964efbf1aee8668cae8feba032e2dcd..92430439f53acc064a33dde195d9c78c7b76be80 100644 (file)
@@ -374,7 +374,16 @@ perform_base_backup(basebackup_options *opt, bbsink *sink)
                Assert(lnext(state.tablespaces, lc) == NULL);
            }
            else
+           {
+               /* Properly terminate the tarfile. */
+               StaticAssertStmt(TAR_BLOCK_SIZE <= 2 * BLCKSZ,
+                                "BLCKSZ too small for 2 tar blocks");
+               memset(sink->bbs_buffer, 0, 2 * TAR_BLOCK_SIZE);
+               bbsink_archive_contents(sink, 2 * TAR_BLOCK_SIZE);
+
+               /* OK, that's the end of the archive. */
                bbsink_end_archive(sink);
+           }
        }
 
        basebackup_progress_wait_wal_archive(&state);
@@ -611,6 +620,13 @@ perform_base_backup(basebackup_options *opt, bbsink *sink)
            sendFileWithContent(sink, pathbuf, "", &manifest);
        }
 
+       /* Properly terminate the tar file. */
+       StaticAssertStmt(TAR_BLOCK_SIZE <= 2 * BLCKSZ,
+                        "BLCKSZ too small for 2 tar blocks");
+       memset(sink->bbs_buffer, 0, 2 * TAR_BLOCK_SIZE);
+       bbsink_archive_contents(sink, 2 * TAR_BLOCK_SIZE);
+
+       /* OK, that's the end of the archive. */
        bbsink_end_archive(sink);
    }
 
index 5fded0f4e6f3d1ef35c3040360036d83b7cf22b7..e6bd3ef52ed7d91f6e7a8e2d90208189da2a599b 100644 (file)
@@ -2,6 +2,16 @@
  *
  * bbstreamer_tar.c
  *
+ * This module implements three types of tar processing. A tar parser
+ * expects unlabelled chunks of data (e.g. BBSTREAMER_UNKNOWN) and splits
+ * it into labelled chunks (any other value of bbstreamer_archive_context).
+ * A tar archiver does the reverse: it takes a bunch of labelled chunks
+ * and produces a tarfile, optionally replacing member headers and trailers
+ * so that upstream bbstreamer objects can perform surgery on the tarfile
+ * contents without knowing the details of the tar format. A tar terminator
+ * just adds two blocks of NUL bytes to the end of the file, since older
+ * server versions produce files with this terminator omitted.
+ *
  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
  *
  * IDENTIFICATION
index 30efc03b830c47f8d7f91a38c1daaa24dd076c2d..1739ac638231c316521a53cf80e9c7855db7916d 100644 (file)
@@ -85,6 +85,12 @@ typedef void (*WriteDataCallback) (size_t nbytes, char *buf,
  */
 #define MINIMUM_VERSION_FOR_MANIFESTS  130000
 
+/*
+ * Before v15, tar files received from the server will be improperly
+ * terminated.
+ */
+#define MINIMUM_VERSION_FOR_TERMINATED_TARFILE 150000
+
 /*
  * Different ways to include WAL
  */
@@ -166,7 +172,8 @@ static void progress_report(int tablespacenum, bool force, bool finished);
 
 static bbstreamer *CreateBackupStreamer(char *archive_name, char *spclocation,
                                        bbstreamer **manifest_inject_streamer_p,
-                                       bool is_recovery_guc_supported);
+                                       bool is_recovery_guc_supported,
+                                       bool expect_unterminated_tarfile);
 static void ReceiveTarFile(PGconn *conn, char *archive_name, char *spclocation,
                           bool tablespacenum);
 static void ReceiveTarCopyChunk(size_t r, char *copybuf, void *callback_data);
@@ -965,7 +972,8 @@ ReceiveCopyData(PGconn *conn, WriteDataCallback callback,
 static bbstreamer *
 CreateBackupStreamer(char *archive_name, char *spclocation,
                     bbstreamer **manifest_inject_streamer_p,
-                    bool is_recovery_guc_supported)
+                    bool is_recovery_guc_supported,
+                    bool expect_unterminated_tarfile)
 {
    bbstreamer *streamer;
    bbstreamer *manifest_inject_streamer = NULL;
@@ -1074,12 +1082,13 @@ CreateBackupStreamer(char *archive_name, char *spclocation,
    /*
     * If we're doing anything that involves understanding the contents of
     * the archive, we'll need to parse it. If not, we can skip parsing it,
-    * but the tar files the server sends are not properly terminated, so
-    * we'll need to add the terminator here.
+    * but old versions of the server send improperly terminated tarfiles,
+    * so if we're talking to such a server we'll need to add the terminator
+    * here.
     */
    if (must_parse_archive)
        streamer = bbstreamer_tar_parser_new(streamer);
-   else
+   else if (expect_unterminated_tarfile)
        streamer = bbstreamer_tar_terminator_new(streamer);
 
    /* Return the results. */
@@ -1099,14 +1108,18 @@ ReceiveTarFile(PGconn *conn, char *archive_name, char *spclocation,
    WriteTarState state;
    bbstreamer *manifest_inject_streamer;
    bool        is_recovery_guc_supported;
+   bool        expect_unterminated_tarfile;
 
    /* Pass all COPY data through to the backup streamer. */
    memset(&state, 0, sizeof(state));
    is_recovery_guc_supported =
        PQserverVersion(conn) >= MINIMUM_VERSION_FOR_RECOVERY_GUC;
+   expect_unterminated_tarfile =
+       PQserverVersion(conn) < MINIMUM_VERSION_FOR_TERMINATED_TARFILE;
    state.streamer = CreateBackupStreamer(archive_name, spclocation,
                                          &manifest_inject_streamer,
-                                         is_recovery_guc_supported);
+                                         is_recovery_guc_supported,
+                                         expect_unterminated_tarfile);
    state.tablespacenum = tablespacenum;
    ReceiveCopyData(conn, ReceiveTarCopyChunk, &state);
    progress_filename = NULL;