Tolerate timeline switches while "pg_basebackup -X fetch" is running.
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>
Thu, 3 Jan 2013 17:51:00 +0000 (19:51 +0200)
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>
Thu, 3 Jan 2013 17:51:00 +0000 (19:51 +0200)
If you take a base backup from a standby server with "pg_basebackup -X
fetch", and the timeline switches while the backup is being taken, the
backup used to fail with an error "requested WAL segment %s has already
been removed". This is because the server-side code that sends over the
required WAL files would not construct the WAL filename with the correct
timeline after a switch.

Fix that by using readdir() to scan pg_xlog for all the WAL segments in the
range, regardless of timeline.

Also, include all timeline history files in the backup, if taken with
"-X fetch". That fixes another related bug: If a timeline switch happened
just before the backup was initiated in a standby, the WAL segment
containing the initial checkpoint record contains WAL from the older
timeline too. Recovery will not accept that without a timeline history file
that lists the older timeline.

Backpatch to 9.2. Versions prior to that were not affected as you could not
take a base backup from a standby before 9.2.

src/backend/access/transam/xlog.c
src/backend/replication/basebackup.c
src/backend/replication/walsender.c
src/include/access/xlog.h

index 58e139dcf5cf9af363ebd21087b020cb1bc1d41a..51a515a5552925a1acefac28aca0f5d1df523990 100644 (file)
@@ -2797,18 +2797,33 @@ PreallocXlogFiles(XLogRecPtr endptr)
 }
 
 /*
- * Get the segno of the latest removed or recycled WAL segment.
- * Returns 0/0 if no WAL segments have been removed since startup.
+ * Throws an error if the given log segment has already been removed or
+ * recycled. The caller should only pass a segment that it knows to have
+ * existed while the server has been running, as this function always
+ * succeeds if no WAL segments have been removed since startup.
+ * 'tli' is only used in the error message.
  */
 void
-XLogGetLastRemoved(XLogSegNo *segno)
+CheckXLogRemoved(XLogSegNo segno, TimeLineID tli)
 {
    /* use volatile pointer to prevent code rearrangement */
    volatile XLogCtlData *xlogctl = XLogCtl;
+   XLogSegNo   lastRemovedSegNo;
 
    SpinLockAcquire(&xlogctl->info_lck);
-   *segno = xlogctl->lastRemovedSegNo;
+   lastRemovedSegNo = xlogctl->lastRemovedSegNo;
    SpinLockRelease(&xlogctl->info_lck);
+
+   if (segno <= lastRemovedSegNo)
+   {
+       char        filename[MAXFNAMELEN];
+
+       XLogFileName(filename, tli, segno);
+       ereport(ERROR,
+               (errcode_for_file_access(),
+                errmsg("requested WAL segment %s has already been removed",
+                       filename)));
+   }
 }
 
 /*
index 0e50ed169ebc1028c00cc75f8ff97c2d51896d43..2330fcc23ad8e8c6130eba63495a27747135c4fb 100644 (file)
@@ -56,14 +56,13 @@ static void base_backup_cleanup(int code, Datum arg);
 static void perform_base_backup(basebackup_options *opt, DIR *tblspcdir);
 static void parse_basebackup_options(List *options, basebackup_options *opt);
 static void SendXlogRecPtrResult(XLogRecPtr ptr);
+static int compareWalFileNames(const void *a, const void *b);
 
 /* Was the backup currently in-progress initiated in recovery mode? */
 static bool backup_started_in_recovery = false;
 
 /*
  * Size of each block sent into the tar stream for larger files.
- *
- * XLogSegSize *MUST* be evenly dividable by this
  */
 #define TAR_SEND_SIZE 32768
 
@@ -227,64 +226,201 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
         * We've left the last tar file "open", so we can now append the
         * required WAL files to it.
         */
-       XLogSegNo   logsegno;
-       XLogSegNo   endlogsegno;
+       char        pathbuf[MAXPGPATH];
+       XLogSegNo   segno;
+       XLogSegNo   startsegno;
+       XLogSegNo   endsegno;
        struct stat statbuf;
+       List       *historyFileList = NIL;
+       List       *walFileList = NIL;
+       char      **walFiles;
+       int         nWalFiles;
+       char        firstoff[MAXFNAMELEN];
+       char        lastoff[MAXFNAMELEN];
+       DIR        *dir;
+       struct dirent *de;
+       int         i;
+       ListCell   *lc;
+       TimeLineID  tli;
 
-       MemSet(&statbuf, 0, sizeof(statbuf));
-       statbuf.st_mode = S_IRUSR | S_IWUSR;
-#ifndef WIN32
-       statbuf.st_uid = geteuid();
-       statbuf.st_gid = getegid();
-#endif
-       statbuf.st_size = XLogSegSize;
-       statbuf.st_mtime = time(NULL);
+       /*
+        * I'd rather not worry about timelines here, so scan pg_xlog and
+        * include all WAL files in the range between 'startptr' and 'endptr',
+        * regardless of the timeline the file is stamped with. If there are
+        * some spurious WAL files belonging to timelines that don't belong
+        * in this server's history, they will be included too. Normally there
+        * shouldn't be such files, but if there are, there's little harm in
+        * including them.
+        */
+       XLByteToSeg(startptr, startsegno);
+       XLogFileName(firstoff, ThisTimeLineID, startsegno);
+       XLByteToPrevSeg(endptr, endsegno);
+       XLogFileName(lastoff, ThisTimeLineID, endsegno);
+
+       dir = AllocateDir("pg_xlog");
+       if (!dir)
+           ereport(ERROR,
+                   (errmsg("could not open directory \"%s\": %m", "pg_xlog")));
+       while ((de = ReadDir(dir, "pg_xlog")) != NULL)
+       {
+           /* Does it look like a WAL segment, and is it in the range? */
+           if (strlen(de->d_name) == 24 &&
+               strspn(de->d_name, "0123456789ABCDEF") == 24 &&
+               strcmp(de->d_name + 8, firstoff + 8) >= 0 &&
+               strcmp(de->d_name + 8, lastoff + 8) <= 0)
+           {
+               walFileList = lappend(walFileList, pstrdup(de->d_name));
+           }
+           /* Does it look like a timeline history file? */
+           else if (strlen(de->d_name) == 8 + strlen(".history") &&
+                    strspn(de->d_name, "0123456789ABCDEF") == 8 &&
+                    strcmp(de->d_name + 8, ".history") == 0)
+           {
+               historyFileList = lappend(historyFileList, pstrdup(de->d_name));
+           }
+       }
+       FreeDir(dir);
 
-       XLByteToSeg(startptr, logsegno);
-       XLByteToPrevSeg(endptr, endlogsegno);
+       /*
+        * Before we go any further, check that none of the WAL segments we
+        * need were removed.
+        */
+       CheckXLogRemoved(startsegno, ThisTimeLineID);
 
-       while (true)
+       /*
+        * Put the WAL filenames into an array, and sort. We send the files
+        * in order from oldest to newest, to reduce the chance that a file
+        * is recycled before we get a chance to send it over.
+        */
+       nWalFiles = list_length(walFileList);
+       walFiles = palloc(nWalFiles * sizeof(char *));
+       i = 0;
+       foreach(lc, walFileList)
        {
-           /* Send another xlog segment */
-           char        fn[MAXPGPATH];
-           int         i;
+           walFiles[i++] = lfirst(lc);
+       }
+       qsort(walFiles, nWalFiles, sizeof(char *), compareWalFileNames);
 
-           XLogFilePath(fn, ThisTimeLineID, logsegno);
-           _tarWriteHeader(fn, NULL, &statbuf);
+       /*
+        * Sanity check: the first and last segment should cover startptr and
+        * endptr, with no gaps in between.
+        */
+       XLogFromFileName(walFiles[0], &tli, &segno);
+       if (segno != startsegno)
+       {
+           char startfname[MAXFNAMELEN];
+           XLogFileName(startfname, ThisTimeLineID, startsegno);
+           ereport(ERROR,
+                   (errmsg("could not find WAL file %s", startfname)));
+       }
+       for (i = 0; i < nWalFiles; i++)
+       {
+           XLogSegNo currsegno = segno;
+           XLogSegNo nextsegno = segno + 1;
 
-           /* Send the actual WAL file contents, block-by-block */
-           for (i = 0; i < XLogSegSize / TAR_SEND_SIZE; i++)
+           XLogFromFileName(walFiles[i], &tli, &segno);
+           if (!(nextsegno == segno || currsegno == segno))
            {
-               char        buf[TAR_SEND_SIZE];
-               XLogRecPtr  ptr;
+               char nextfname[MAXFNAMELEN];
+               XLogFileName(nextfname, ThisTimeLineID, nextsegno);
+               ereport(ERROR,
+                       (errmsg("could not find WAL file %s", nextfname)));
+           }
+       }
+       if (segno != endsegno)
+       {
+           char endfname[MAXFNAMELEN];
+           XLogFileName(endfname, ThisTimeLineID, endsegno);
+           ereport(ERROR,
+                   (errmsg("could not find WAL file %s", endfname)));
+       }
+
+       /* Ok, we have everything we need. Send the WAL files. */
+       for (i = 0; i < nWalFiles; i++)
+       {
+           FILE       *fp;
+           char        buf[TAR_SEND_SIZE];
+           size_t      cnt;
+           pgoff_t     len = 0;
 
-               XLogSegNoOffsetToRecPtr(logsegno, TAR_SEND_SIZE * i, ptr);
+           snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s", walFiles[i]);
+           XLogFromFileName(walFiles[i], &tli, &segno);
 
+           fp = AllocateFile(pathbuf, "rb");
+           if (fp == NULL)
+           {
                /*
-                * Some old compilers, e.g. gcc 2.95.3/x86, think that passing
-                * a struct in the same function as a longjump might clobber a
-                * variable.  bjm 2011-02-04
-                * http://lists.apple.com/archives/xcode-users/2003/Dec//msg000
-                * 51.html
+                * Most likely reason for this is that the file was already
+                * removed by a checkpoint, so check for that to get a better
+                * error message.
                 */
-               XLogRead(buf, ThisTimeLineID, ptr, TAR_SEND_SIZE);
-               if (pq_putmessage('d', buf, TAR_SEND_SIZE))
+               CheckXLogRemoved(segno, tli);
+
+               ereport(ERROR,
+                       (errcode_for_file_access(),
+                        errmsg("could not open file \"%s\": %m", pathbuf)));
+           }
+
+           if (fstat(fileno(fp), &statbuf) != 0)
+               ereport(ERROR,
+                       (errcode_for_file_access(),
+                        errmsg("could not stat file \"%s\": %m",
+                               pathbuf)));
+           if (statbuf.st_size != XLogSegSize)
+           {
+               CheckXLogRemoved(segno, tli);
+               ereport(ERROR,
+                       (errcode_for_file_access(),
+                        errmsg("unexpected WAL file size \"%s\"", walFiles[i])));
+           }
+
+           _tarWriteHeader(pathbuf, NULL, &statbuf);
+
+           while ((cnt = fread(buf, 1, Min(sizeof(buf), XLogSegSize - len), fp)) > 0)
+           {
+               CheckXLogRemoved(segno, tli);
+               /* Send the chunk as a CopyData message */
+               if (pq_putmessage('d', buf, cnt))
                    ereport(ERROR,
                            (errmsg("base backup could not send data, aborting backup")));
+
+               len += cnt;
+               if (len == XLogSegSize)
+                   break;
            }
 
-           /*
-            * Files are always fixed size, and always end on a 512 byte
-            * boundary, so padding is never necessary.
-            */
+           if (len != XLogSegSize)
+           {
+               CheckXLogRemoved(segno, tli);
+               ereport(ERROR,
+                       (errcode_for_file_access(),
+                        errmsg("unexpected WAL file size \"%s\"", walFiles[i])));
+           }
 
+           /* XLogSegSize is a multiple of 512, so no need for padding */
+           FreeFile(fp);
+       }
 
-           /* Advance to the next WAL file */
-           logsegno++;
+       /*
+        * Send timeline history files too. Only the latest timeline history
+        * file is required for recovery, and even that only if there happens
+        * to be a timeline switch in the first WAL segment that contains the
+        * checkpoint record, or if we're taking a base backup from a standby
+        * server and the target timeline changes while the backup is taken. 
+        * But they are small and highly useful for debugging purposes, so
+        * better include them all, always.
+        */
+       foreach(lc, historyFileList)
+       {
+           char *fname = lfirst(lc);
+           snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s", fname);
 
-           /* Have we reached our stop position yet? */
-           if (logsegno > endlogsegno)
-               break;
+           if (lstat(pathbuf, &statbuf) != 0)
+               ereport(ERROR,
+                       (errcode_for_file_access(),
+                        errmsg("could not stat file \"%s\": %m", pathbuf)));
+
+           sendFile(pathbuf, pathbuf, &statbuf, false);
        }
 
        /* Send CopyDone message for the last tar file */
@@ -293,6 +429,19 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
    SendXlogRecPtrResult(endptr);
 }
 
+/*
+ * qsort comparison function, to compare log/seg portion of WAL segment
+ * filenames, ignoring the timeline portion.
+ */
+static int
+compareWalFileNames(const void *a, const void *b)
+{
+   char *fna = *((char **) a);
+   char *fnb = *((char **) b);
+
+   return strcmp(fna + 8, fnb + 8);
+}
+
 /*
  * Parse the base backup options passed down by the parser
  */
index 0e0982fc88d95b9bc09ce9ad906c9826157f0c2a..5408b142f8d9b7d0522327c9daafc36090ba4348 100644 (file)
@@ -1170,7 +1170,6 @@ XLogRead(char *buf, TimeLineID tli, XLogRecPtr startptr, Size count)
    char       *p;
    XLogRecPtr  recptr;
    Size        nbytes;
-   XLogSegNo   lastRemovedSegNo;
    XLogSegNo   segno;
 
 retry:
@@ -1263,13 +1262,8 @@ retry:
     * read() succeeds in that case, but the data we tried to read might
     * already have been overwritten with new WAL records.
     */
-   XLogGetLastRemoved(&lastRemovedSegNo);
    XLByteToSeg(startptr, segno);
-   if (segno <= lastRemovedSegNo)
-       ereport(ERROR,
-               (errcode_for_file_access(),
-                errmsg("requested WAL segment %s has already been removed",
-                       XLogFileNameP(sendTimeLine, segno))));
+   CheckXLogRemoved(segno, ThisTimeLineID);
 
    /*
     * During recovery, the currently-open WAL file might be replaced with the
index 1ce922d89d92040faf30414bad07402359f55d89..885b5fc0ad74396321762d53e255f0faec0897fd 100644 (file)
@@ -267,7 +267,7 @@ extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
 extern int XLogFileInit(XLogSegNo segno, bool *use_existent, bool use_lock);
 extern int XLogFileOpen(XLogSegNo segno);
 
-extern void XLogGetLastRemoved(XLogSegNo *segno);
+extern void CheckXLogRemoved(XLogSegNo segno, TimeLineID tli);
 extern void XLogSetAsyncXactLSN(XLogRecPtr record);
 
 extern Buffer RestoreBackupBlock(XLogRecPtr lsn, XLogRecord *record,