Split off functions related to timeline history files and XLOG archiving.
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>
Tue, 2 Oct 2012 10:37:19 +0000 (13:37 +0300)
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>
Tue, 2 Oct 2012 10:37:19 +0000 (13:37 +0300)
This is just refactoring, to make the functions accessible outside xlog.c.
A followup patch will make use of that, to allow fetching timeline history
files over streaming replication.

src/backend/access/transam/Makefile
src/backend/access/transam/timeline.c [new file with mode: 0644]
src/backend/access/transam/xlog.c
src/backend/access/transam/xlogarchive.c [new file with mode: 0644]
src/include/access/timeline.h [new file with mode: 0644]
src/include/access/xlog_internal.h

index f82f10e989ef645dd58988c6d7c2c941e4e9e1df..700cfd85c08e7b78255527bb7e9a91d12fc91166 100644 (file)
@@ -13,7 +13,8 @@ top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
 
 OBJS = clog.o transam.o varsup.o xact.o rmgr.o slru.o subtrans.o multixact.o \
-       twophase.o twophase_rmgr.o xlog.o xlogfuncs.o xlogutils.o
+       timeline.o twophase.o twophase_rmgr.o xlog.o xlogarchive.o xlogfuncs.o \
+       xlogutils.o
 
 include $(top_srcdir)/src/backend/common.mk
 
diff --git a/src/backend/access/transam/timeline.c b/src/backend/access/transam/timeline.c
new file mode 100644 (file)
index 0000000..89d31c5
--- /dev/null
@@ -0,0 +1,378 @@
+/*-------------------------------------------------------------------------
+ *
+ * timeline.c
+ *             Functions for reading and writing timeline history files.
+ *
+ * A timeline history file lists the timeline changes of the timeline, in
+ * a simple text format. They are archived along with the WAL segments.
+ *
+ * The files are named like "<WAL segment>.history". For example, if the
+ * database starts up and switches to timeline 5, while processing WAL
+ * segment 000000030000002A00000006 (the old timeline was 3), the timeline
+ * history file would be called "000000050000002A00000006.history".
+ *
+ * Each line in the file represents a timeline switch:
+ *
+ * <parentTLI> <xlogfname> <reason>
+ *
+ *     parentTLI       ID of the parent timeline
+ *     xlogfname       filename of the WAL segment where the switch happened
+ *     reason          human-readable explanation of why the timeline was changed
+ *
+ * The fields are separated by tabs. Lines beginning with # are comments, and
+ * are ignored. Empty lines are also ignored.
+ *
+ * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/backend/access/transam/timeline.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <stdio.h>
+#include <unistd.h>
+
+#include "access/timeline.h"
+#include "access/xlog_internal.h"
+#include "access/xlogdefs.h"
+#include "storage/fd.h"
+
+/*
+ * Try to read a timeline's history file.
+ *
+ * If successful, return the list of component TLIs (the given TLI followed by
+ * its ancestor TLIs). If we can't find the history file, assume that the
+ * timeline has no parents, and return a list of just the specified timeline
+ * ID.
+ */
+List *
+readTimeLineHistory(TimeLineID targetTLI)
+{
+       List       *result;
+       char            path[MAXPGPATH];
+       char            histfname[MAXFNAMELEN];
+       char            fline[MAXPGPATH];
+       FILE       *fd;
+
+       /* Timeline 1 does not have a history file, so no need to check */
+       if (targetTLI == 1)
+               return list_make1_int((int) targetTLI);
+
+       if (InArchiveRecovery)
+       {
+               TLHistoryFileName(histfname, targetTLI);
+               RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0);
+       }
+       else
+               TLHistoryFilePath(path, targetTLI);
+
+       fd = AllocateFile(path, "r");
+       if (fd == NULL)
+       {
+               if (errno != ENOENT)
+                       ereport(FATAL,
+                                       (errcode_for_file_access(),
+                                        errmsg("could not open file \"%s\": %m", path)));
+               /* Not there, so assume no parents */
+               return list_make1_int((int) targetTLI);
+       }
+
+       result = NIL;
+
+       /*
+        * Parse the file...
+        */
+       while (fgets(fline, sizeof(fline), fd) != NULL)
+       {
+               /* skip leading whitespace and check for # comment */
+               char       *ptr;
+               char       *endptr;
+               TimeLineID      tli;
+
+               for (ptr = fline; *ptr; ptr++)
+               {
+                       if (!isspace((unsigned char) *ptr))
+                               break;
+               }
+               if (*ptr == '\0' || *ptr == '#')
+                       continue;
+
+               /* expect a numeric timeline ID as first field of line */
+               tli = (TimeLineID) strtoul(ptr, &endptr, 0);
+               if (endptr == ptr)
+                       ereport(FATAL,
+                                       (errmsg("syntax error in history file: %s", fline),
+                                        errhint("Expected a numeric timeline ID.")));
+
+               if (result &&
+                       tli <= (TimeLineID) linitial_int(result))
+                       ereport(FATAL,
+                                       (errmsg("invalid data in history file: %s", fline),
+                                  errhint("Timeline IDs must be in increasing sequence.")));
+
+               /* Build list with newest item first */
+               result = lcons_int((int) tli, result);
+
+               /* we ignore the remainder of each line */
+       }
+
+       FreeFile(fd);
+
+       if (result &&
+               targetTLI <= (TimeLineID) linitial_int(result))
+               ereport(FATAL,
+                               (errmsg("invalid data in history file \"%s\"", path),
+                       errhint("Timeline IDs must be less than child timeline's ID.")));
+
+       result = lcons_int((int) targetTLI, result);
+
+       ereport(DEBUG3,
+                       (errmsg_internal("history of timeline %u is %s",
+                                                        targetTLI, nodeToString(result))));
+
+       return result;
+}
+
+/*
+ * Probe whether a timeline history file exists for the given timeline ID
+ */
+bool
+existsTimeLineHistory(TimeLineID probeTLI)
+{
+       char            path[MAXPGPATH];
+       char            histfname[MAXFNAMELEN];
+       FILE       *fd;
+
+       /* Timeline 1 does not have a history file, so no need to check */
+       if (probeTLI == 1)
+               return false;
+
+       if (InArchiveRecovery)
+       {
+               TLHistoryFileName(histfname, probeTLI);
+               RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0);
+       }
+       else
+               TLHistoryFilePath(path, probeTLI);
+
+       fd = AllocateFile(path, "r");
+       if (fd != NULL)
+       {
+               FreeFile(fd);
+               return true;
+       }
+       else
+       {
+               if (errno != ENOENT)
+                       ereport(FATAL,
+                                       (errcode_for_file_access(),
+                                        errmsg("could not open file \"%s\": %m", path)));
+               return false;
+       }
+}
+
+/*
+ * Find the newest existing timeline, assuming that startTLI exists.
+ *
+ * Note: while this is somewhat heuristic, it does positively guarantee
+ * that (result + 1) is not a known timeline, and therefore it should
+ * be safe to assign that ID to a new timeline.
+ */
+TimeLineID
+findNewestTimeLine(TimeLineID startTLI)
+{
+       TimeLineID      newestTLI;
+       TimeLineID      probeTLI;
+
+       /*
+        * The algorithm is just to probe for the existence of timeline history
+        * files.  XXX is it useful to allow gaps in the sequence?
+        */
+       newestTLI = startTLI;
+
+       for (probeTLI = startTLI + 1;; probeTLI++)
+       {
+               if (existsTimeLineHistory(probeTLI))
+               {
+                       newestTLI = probeTLI;           /* probeTLI exists */
+               }
+               else
+               {
+                       /* doesn't exist, assume we're done */
+                       break;
+               }
+       }
+
+       return newestTLI;
+}
+
+/*
+ * Create a new timeline history file.
+ *
+ *     newTLI: ID of the new timeline
+ *     parentTLI: ID of its immediate parent
+ *     endTLI et al: ID of the last used WAL file, for annotation purposes
+ *     reason: human-readable explanation of why the timeline was switched
+ *
+ * Currently this is only used at the end recovery, and so there are no locking
+ * considerations.     But we should be just as tense as XLogFileInit to avoid
+ * emplacing a bogus file.
+ */
+void
+writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
+                                        TimeLineID endTLI, XLogSegNo endLogSegNo, char *reason)
+{
+       char            path[MAXPGPATH];
+       char            tmppath[MAXPGPATH];
+       char            histfname[MAXFNAMELEN];
+       char            xlogfname[MAXFNAMELEN];
+       char            buffer[BLCKSZ];
+       int                     srcfd;
+       int                     fd;
+       int                     nbytes;
+
+       Assert(newTLI > parentTLI); /* else bad selection of newTLI */
+
+       /*
+        * Write into a temp file name.
+        */
+       snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
+
+       unlink(tmppath);
+
+       /* do not use get_sync_bit() here --- want to fsync only at end of fill */
+       fd = BasicOpenFile(tmppath, O_RDWR | O_CREAT | O_EXCL,
+                                          S_IRUSR | S_IWUSR);
+       if (fd < 0)
+               ereport(ERROR,
+                               (errcode_for_file_access(),
+                                errmsg("could not create file \"%s\": %m", tmppath)));
+
+       /*
+        * If a history file exists for the parent, copy it verbatim
+        */
+       if (InArchiveRecovery)
+       {
+               TLHistoryFileName(histfname, parentTLI);
+               RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0);
+       }
+       else
+               TLHistoryFilePath(path, parentTLI);
+
+       srcfd = BasicOpenFile(path, O_RDONLY, 0);
+       if (srcfd < 0)
+       {
+               if (errno != ENOENT)
+                       ereport(ERROR,
+                                       (errcode_for_file_access(),
+                                        errmsg("could not open file \"%s\": %m", path)));
+               /* Not there, so assume parent has no parents */
+       }
+       else
+       {
+               for (;;)
+               {
+                       errno = 0;
+                       nbytes = (int) read(srcfd, buffer, sizeof(buffer));
+                       if (nbytes < 0 || errno != 0)
+                               ereport(ERROR,
+                                               (errcode_for_file_access(),
+                                                errmsg("could not read file \"%s\": %m", path)));
+                       if (nbytes == 0)
+                               break;
+                       errno = 0;
+                       if ((int) write(fd, buffer, nbytes) != nbytes)
+                       {
+                               int                     save_errno = errno;
+
+                               /*
+                                * If we fail to make the file, delete it to release disk
+                                * space
+                                */
+                               unlink(tmppath);
+
+                               /*
+                                * if write didn't set errno, assume problem is no disk space
+                                */
+                               errno = save_errno ? save_errno : ENOSPC;
+
+                               ereport(ERROR,
+                                               (errcode_for_file_access(),
+                                        errmsg("could not write to file \"%s\": %m", tmppath)));
+                       }
+               }
+               close(srcfd);
+       }
+
+       /*
+        * Append one line with the details of this timeline split.
+        *
+        * If we did have a parent file, insert an extra newline just in case the
+        * parent file failed to end with one.
+        */
+       XLogFileName(xlogfname, endTLI, endLogSegNo);
+
+       snprintf(buffer, sizeof(buffer),
+                        "%s%u\t%s\t%s\n",
+                        (srcfd < 0) ? "" : "\n",
+                        parentTLI,
+                        xlogfname,
+                        reason);
+
+       nbytes = strlen(buffer);
+       errno = 0;
+       if ((int) write(fd, buffer, nbytes) != nbytes)
+       {
+               int                     save_errno = errno;
+
+               /*
+                * If we fail to make the file, delete it to release disk space
+                */
+               unlink(tmppath);
+               /* if write didn't set errno, assume problem is no disk space */
+               errno = save_errno ? save_errno : ENOSPC;
+
+               ereport(ERROR,
+                               (errcode_for_file_access(),
+                                errmsg("could not write to file \"%s\": %m", tmppath)));
+       }
+
+       if (pg_fsync(fd) != 0)
+               ereport(ERROR,
+                               (errcode_for_file_access(),
+                                errmsg("could not fsync file \"%s\": %m", tmppath)));
+
+       if (close(fd))
+               ereport(ERROR,
+                               (errcode_for_file_access(),
+                                errmsg("could not close file \"%s\": %m", tmppath)));
+
+
+       /*
+        * Now move the completed history file into place with its final name.
+        */
+       TLHistoryFilePath(path, newTLI);
+
+       /*
+        * Prefer link() to rename() here just to be really sure that we don't
+        * overwrite an existing logfile.  However, there shouldn't be one, so
+        * rename() is an acceptable substitute except for the truly paranoid.
+        */
+#if HAVE_WORKING_LINK
+       if (link(tmppath, path) < 0)
+               ereport(ERROR,
+                               (errcode_for_file_access(),
+                                errmsg("could not link file \"%s\" to \"%s\": %m",
+                                               tmppath, path)));
+       unlink(tmppath);
+#else
+       if (rename(tmppath, path) < 0)
+               ereport(ERROR,
+                               (errcode_for_file_access(),
+                                errmsg("could not rename file \"%s\" to \"%s\": %m",
+                                               tmppath, path)));
+#endif
+}
index ff56c26ab4c0edd26bf0a494715efabfa86976b2..b3519b04b33937670925725005168facac59234e 100644 (file)
@@ -26,6 +26,7 @@
 #include "access/clog.h"
 #include "access/multixact.h"
 #include "access/subtrans.h"
+#include "access/timeline.h"
 #include "access/transam.h"
 #include "access/tuptoaster.h"
 #include "access/twophase.h"
@@ -190,14 +191,14 @@ static bool LocalHotStandbyActive = false;
  */
 static int     LocalXLogInsertAllowed = -1;
 
-/* Are we recovering using offline XLOG archives? */
-static bool InArchiveRecovery = false;
+/* Are we recovering using offline XLOG archives? (only valid in the startup process) */
+bool InArchiveRecovery = false;
 
 /* Was the last xlog file restored from archive, or local? */
 static bool restoredFromArchive = false;
 
 /* options taken from recovery.conf for archive recovery */
-static char *recoveryRestoreCommand = NULL;
+char *recoveryRestoreCommand = NULL;
 static char *recoveryEndCommand = NULL;
 static char *archiveCleanupCommand = NULL;
 static RecoveryTargetType recoveryTarget = RECOVERY_TARGET_UNSET;
@@ -208,7 +209,7 @@ static TimestampTz recoveryTargetTime;
 static char *recoveryTargetName;
 
 /* options taken from recovery.conf for XLOG streaming */
-static bool StandbyMode = false;
+bool StandbyMode = false;
 static char *PrimaryConnInfo = NULL;
 static char *TriggerFile = NULL;
 
@@ -604,11 +605,6 @@ typedef struct xl_restore_point
 } xl_restore_point;
 
 
-static void XLogArchiveNotify(const char *xlog);
-static void XLogArchiveNotifySeg(XLogSegNo segno);
-static bool XLogArchiveCheckDone(const char *xlog);
-static bool XLogArchiveIsBusy(const char *xlog);
-static void XLogArchiveCleanup(const char *xlog);
 static void readRecoveryCommandFile(void);
 static void exitArchiveRecovery(TimeLineID endTLI, XLogSegNo endLogSegNo);
 static bool recoveryStopsHere(XLogRecord *record, bool *includeThis);
@@ -636,10 +632,6 @@ static bool XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
                         bool randAccess);
 static int     emode_for_corrupt_record(int emode, XLogRecPtr RecPtr);
 static void XLogFileClose(void);
-static bool RestoreArchivedFile(char *path, const char *xlogfname,
-                                       const char *recovername, off_t expectedSize);
-static void ExecuteRecoveryCommand(char *command, char *commandName,
-                                          bool failOnerror);
 static void PreallocXlogFiles(XLogRecPtr endptr);
 static void RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr endptr);
 static void UpdateLastRemovedPtr(char *filename);
@@ -652,12 +644,7 @@ static bool ValidXLogPageHeader(XLogPageHeader hdr, int emode);
 static bool ValidXLogRecordHeader(XLogRecPtr *RecPtr, XLogRecord *record,
                                          int emode, bool randAccess);
 static XLogRecord *ReadCheckpointRecord(XLogRecPtr RecPtr, int whichChkpt);
-static List *readTimeLineHistory(TimeLineID targetTLI);
-static bool existsTimeLineHistory(TimeLineID probeTLI);
 static bool rescanLatestTimeLine(void);
-static TimeLineID findNewestTimeLine(TimeLineID startTLI);
-static void writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
-                                        TimeLineID endTLI, XLogSegNo endLogSegNo);
 static void WriteControlFile(void);
 static void ReadControlFile(void);
 static char *str_time(pg_time_t tnow);
@@ -1278,168 +1265,6 @@ XLogCheckBuffer(XLogRecData *rdata, bool doPageWrites,
        return false;                           /* buffer does not need to be backed up */
 }
 
-/*
- * XLogArchiveNotify
- *
- * Create an archive notification file
- *
- * The name of the notification file is the message that will be picked up
- * by the archiver, e.g. we write 0000000100000001000000C6.ready
- * and the archiver then knows to archive XLOGDIR/0000000100000001000000C6,
- * then when complete, rename it to 0000000100000001000000C6.done
- */
-static void
-XLogArchiveNotify(const char *xlog)
-{
-       char            archiveStatusPath[MAXPGPATH];
-       FILE       *fd;
-
-       /* insert an otherwise empty file called <XLOG>.ready */
-       StatusFilePath(archiveStatusPath, xlog, ".ready");
-       fd = AllocateFile(archiveStatusPath, "w");
-       if (fd == NULL)
-       {
-               ereport(LOG,
-                               (errcode_for_file_access(),
-                                errmsg("could not create archive status file \"%s\": %m",
-                                               archiveStatusPath)));
-               return;
-       }
-       if (FreeFile(fd))
-       {
-               ereport(LOG,
-                               (errcode_for_file_access(),
-                                errmsg("could not write archive status file \"%s\": %m",
-                                               archiveStatusPath)));
-               return;
-       }
-
-       /* Notify archiver that it's got something to do */
-       if (IsUnderPostmaster)
-               SendPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER);
-}
-
-/*
- * Convenience routine to notify using segment number representation of filename
- */
-static void
-XLogArchiveNotifySeg(XLogSegNo segno)
-{
-       char            xlog[MAXFNAMELEN];
-
-       XLogFileName(xlog, ThisTimeLineID, segno);
-       XLogArchiveNotify(xlog);
-}
-
-/*
- * XLogArchiveCheckDone
- *
- * This is called when we are ready to delete or recycle an old XLOG segment
- * file or backup history file.  If it is okay to delete it then return true.
- * If it is not time to delete it, make sure a .ready file exists, and return
- * false.
- *
- * If <XLOG>.done exists, then return true; else if <XLOG>.ready exists,
- * then return false; else create <XLOG>.ready and return false.
- *
- * The reason we do things this way is so that if the original attempt to
- * create <XLOG>.ready fails, we'll retry during subsequent checkpoints.
- */
-static bool
-XLogArchiveCheckDone(const char *xlog)
-{
-       char            archiveStatusPath[MAXPGPATH];
-       struct stat stat_buf;
-
-       /* Always deletable if archiving is off */
-       if (!XLogArchivingActive())
-               return true;
-
-       /* First check for .done --- this means archiver is done with it */
-       StatusFilePath(archiveStatusPath, xlog, ".done");
-       if (stat(archiveStatusPath, &stat_buf) == 0)
-               return true;
-
-       /* check for .ready --- this means archiver is still busy with it */
-       StatusFilePath(archiveStatusPath, xlog, ".ready");
-       if (stat(archiveStatusPath, &stat_buf) == 0)
-               return false;
-
-       /* Race condition --- maybe archiver just finished, so recheck */
-       StatusFilePath(archiveStatusPath, xlog, ".done");
-       if (stat(archiveStatusPath, &stat_buf) == 0)
-               return true;
-
-       /* Retry creation of the .ready file */
-       XLogArchiveNotify(xlog);
-       return false;
-}
-
-/*
- * XLogArchiveIsBusy
- *
- * Check to see if an XLOG segment file is still unarchived.
- * This is almost but not quite the inverse of XLogArchiveCheckDone: in
- * the first place we aren't chartered to recreate the .ready file, and
- * in the second place we should consider that if the file is already gone
- * then it's not busy.  (This check is needed to handle the race condition
- * that a checkpoint already deleted the no-longer-needed file.)
- */
-static bool
-XLogArchiveIsBusy(const char *xlog)
-{
-       char            archiveStatusPath[MAXPGPATH];
-       struct stat stat_buf;
-
-       /* First check for .done --- this means archiver is done with it */
-       StatusFilePath(archiveStatusPath, xlog, ".done");
-       if (stat(archiveStatusPath, &stat_buf) == 0)
-               return false;
-
-       /* check for .ready --- this means archiver is still busy with it */
-       StatusFilePath(archiveStatusPath, xlog, ".ready");
-       if (stat(archiveStatusPath, &stat_buf) == 0)
-               return true;
-
-       /* Race condition --- maybe archiver just finished, so recheck */
-       StatusFilePath(archiveStatusPath, xlog, ".done");
-       if (stat(archiveStatusPath, &stat_buf) == 0)
-               return false;
-
-       /*
-        * Check to see if the WAL file has been removed by checkpoint, which
-        * implies it has already been archived, and explains why we can't see a
-        * status file for it.
-        */
-       snprintf(archiveStatusPath, MAXPGPATH, XLOGDIR "/%s", xlog);
-       if (stat(archiveStatusPath, &stat_buf) != 0 &&
-               errno == ENOENT)
-               return false;
-
-       return true;
-}
-
-/*
- * XLogArchiveCleanup
- *
- * Cleanup archive notification file(s) for a particular xlog segment
- */
-static void
-XLogArchiveCleanup(const char *xlog)
-{
-       char            archiveStatusPath[MAXPGPATH];
-
-       /* Remove the .done file */
-       StatusFilePath(archiveStatusPath, xlog, ".done");
-       unlink(archiveStatusPath);
-       /* should we complain about failure? */
-
-       /* Remove the .ready file if present --- normally it shouldn't be */
-       StatusFilePath(archiveStatusPath, xlog, ".ready");
-       unlink(archiveStatusPath);
-       /* should we complain about failure? */
-}
-
 /*
  * Advance the Insert state to the next buffer page, writing out the next
  * buffer if it still contains unwritten data.
@@ -2954,386 +2779,6 @@ XLogFileClose(void)
        openLogFile = -1;
 }
 
-/*
- * Attempt to retrieve the specified file from off-line archival storage.
- * If successful, fill "path" with its complete path (note that this will be
- * a temp file name that doesn't follow the normal naming convention), and
- * return TRUE.
- *
- * If not successful, fill "path" with the name of the normal on-line file
- * (which may or may not actually exist, but we'll try to use it), and return
- * FALSE.
- *
- * For fixed-size files, the caller may pass the expected size as an
- * additional crosscheck on successful recovery.  If the file size is not
- * known, set expectedSize = 0.
- */
-static bool
-RestoreArchivedFile(char *path, const char *xlogfname,
-                                       const char *recovername, off_t expectedSize)
-{
-       char            xlogpath[MAXPGPATH];
-       char            xlogRestoreCmd[MAXPGPATH];
-       char            lastRestartPointFname[MAXPGPATH];
-       char       *dp;
-       char       *endp;
-       const char *sp;
-       int                     rc;
-       bool            signaled;
-       struct stat stat_buf;
-       XLogSegNo       restartSegNo;
-
-       /* In standby mode, restore_command might not be supplied */
-       if (recoveryRestoreCommand == NULL)
-               goto not_available;
-
-       /*
-        * When doing archive recovery, we always prefer an archived log file even
-        * if a file of the same name exists in XLOGDIR.  The reason is that the
-        * file in XLOGDIR could be an old, un-filled or partly-filled version
-        * that was copied and restored as part of backing up $PGDATA.
-        *
-        * We could try to optimize this slightly by checking the local copy
-        * lastchange timestamp against the archived copy, but we have no API to
-        * do this, nor can we guarantee that the lastchange timestamp was
-        * preserved correctly when we copied to archive. Our aim is robustness,
-        * so we elect not to do this.
-        *
-        * If we cannot obtain the log file from the archive, however, we will try
-        * to use the XLOGDIR file if it exists.  This is so that we can make use
-        * of log segments that weren't yet transferred to the archive.
-        *
-        * Notice that we don't actually overwrite any files when we copy back
-        * from archive because the recoveryRestoreCommand may inadvertently
-        * restore inappropriate xlogs, or they may be corrupt, so we may wish to
-        * fallback to the segments remaining in current XLOGDIR later. The
-        * copy-from-archive filename is always the same, ensuring that we don't
-        * run out of disk space on long recoveries.
-        */
-       snprintf(xlogpath, MAXPGPATH, XLOGDIR "/%s", recovername);
-
-       /*
-        * Make sure there is no existing file named recovername.
-        */
-       if (stat(xlogpath, &stat_buf) != 0)
-       {
-               if (errno != ENOENT)
-                       ereport(FATAL,
-                                       (errcode_for_file_access(),
-                                        errmsg("could not stat file \"%s\": %m",
-                                                       xlogpath)));
-       }
-       else
-       {
-               if (unlink(xlogpath) != 0)
-                       ereport(FATAL,
-                                       (errcode_for_file_access(),
-                                        errmsg("could not remove file \"%s\": %m",
-                                                       xlogpath)));
-       }
-
-       /*
-        * Calculate the archive file cutoff point for use during log shipping
-        * replication. All files earlier than this point can be deleted from the
-        * archive, though there is no requirement to do so.
-        *
-        * We initialise this with the filename of an InvalidXLogRecPtr, which
-        * will prevent the deletion of any WAL files from the archive because of
-        * the alphabetic sorting property of WAL filenames.
-        *
-        * Once we have successfully located the redo pointer of the checkpoint
-        * from which we start recovery we never request a file prior to the redo
-        * pointer of the last restartpoint. When redo begins we know that we have
-        * successfully located it, so there is no need for additional status
-        * flags to signify the point when we can begin deleting WAL files from
-        * the archive.
-        */
-       if (InRedo)
-       {
-               XLByteToSeg(ControlFile->checkPointCopy.redo, restartSegNo);
-               XLogFileName(lastRestartPointFname,
-                                        ControlFile->checkPointCopy.ThisTimeLineID,
-                                        restartSegNo);
-               /* we shouldn't need anything earlier than last restart point */
-               Assert(strcmp(lastRestartPointFname, xlogfname) <= 0);
-       }
-       else
-               XLogFileName(lastRestartPointFname, 0, 0L);
-
-       /*
-        * construct the command to be executed
-        */
-       dp = xlogRestoreCmd;
-       endp = xlogRestoreCmd + MAXPGPATH - 1;
-       *endp = '\0';
-
-       for (sp = recoveryRestoreCommand; *sp; sp++)
-       {
-               if (*sp == '%')
-               {
-                       switch (sp[1])
-                       {
-                               case 'p':
-                                       /* %p: relative path of target file */
-                                       sp++;
-                                       StrNCpy(dp, xlogpath, endp - dp);
-                                       make_native_path(dp);
-                                       dp += strlen(dp);
-                                       break;
-                               case 'f':
-                                       /* %f: filename of desired file */
-                                       sp++;
-                                       StrNCpy(dp, xlogfname, endp - dp);
-                                       dp += strlen(dp);
-                                       break;
-                               case 'r':
-                                       /* %r: filename of last restartpoint */
-                                       sp++;
-                                       StrNCpy(dp, lastRestartPointFname, endp - dp);
-                                       dp += strlen(dp);
-                                       break;
-                               case '%':
-                                       /* convert %% to a single % */
-                                       sp++;
-                                       if (dp < endp)
-                                               *dp++ = *sp;
-                                       break;
-                               default:
-                                       /* otherwise treat the % as not special */
-                                       if (dp < endp)
-                                               *dp++ = *sp;
-                                       break;
-                       }
-               }
-               else
-               {
-                       if (dp < endp)
-                               *dp++ = *sp;
-               }
-       }
-       *dp = '\0';
-
-       ereport(DEBUG3,
-                       (errmsg_internal("executing restore command \"%s\"",
-                                                        xlogRestoreCmd)));
-
-       /*
-        * Check signals before restore command and reset afterwards.
-        */
-       PreRestoreCommand();
-
-       /*
-        * Copy xlog from archival storage to XLOGDIR
-        */
-       rc = system(xlogRestoreCmd);
-
-       PostRestoreCommand();
-
-       if (rc == 0)
-       {
-               /*
-                * command apparently succeeded, but let's make sure the file is
-                * really there now and has the correct size.
-                */
-               if (stat(xlogpath, &stat_buf) == 0)
-               {
-                       if (expectedSize > 0 && stat_buf.st_size != expectedSize)
-                       {
-                               int                     elevel;
-
-                               /*
-                                * If we find a partial file in standby mode, we assume it's
-                                * because it's just being copied to the archive, and keep
-                                * trying.
-                                *
-                                * Otherwise treat a wrong-sized file as FATAL to ensure the
-                                * DBA would notice it, but is that too strong? We could try
-                                * to plow ahead with a local copy of the file ... but the
-                                * problem is that there probably isn't one, and we'd
-                                * incorrectly conclude we've reached the end of WAL and we're
-                                * done recovering ...
-                                */
-                               if (StandbyMode && stat_buf.st_size < expectedSize)
-                                       elevel = DEBUG1;
-                               else
-                                       elevel = FATAL;
-                               ereport(elevel,
-                                               (errmsg("archive file \"%s\" has wrong size: %lu instead of %lu",
-                                                               xlogfname,
-                                                               (unsigned long) stat_buf.st_size,
-                                                               (unsigned long) expectedSize)));
-                               return false;
-                       }
-                       else
-                       {
-                               ereport(LOG,
-                                               (errmsg("restored log file \"%s\" from archive",
-                                                               xlogfname)));
-                               strcpy(path, xlogpath);
-                               return true;
-                       }
-               }
-               else
-               {
-                       /* stat failed */
-                       if (errno != ENOENT)
-                               ereport(FATAL,
-                                               (errcode_for_file_access(),
-                                                errmsg("could not stat file \"%s\": %m",
-                                                               xlogpath)));
-               }
-       }
-
-       /*
-        * Remember, we rollforward UNTIL the restore fails so failure here is
-        * just part of the process... that makes it difficult to determine
-        * whether the restore failed because there isn't an archive to restore,
-        * or because the administrator has specified the restore program
-        * incorrectly.  We have to assume the former.
-        *
-        * However, if the failure was due to any sort of signal, it's best to
-        * punt and abort recovery.  (If we "return false" here, upper levels will
-        * assume that recovery is complete and start up the database!) It's
-        * essential to abort on child SIGINT and SIGQUIT, because per spec
-        * system() ignores SIGINT and SIGQUIT while waiting; if we see one of
-        * those it's a good bet we should have gotten it too.
-        *
-        * On SIGTERM, assume we have received a fast shutdown request, and exit
-        * cleanly. It's pure chance whether we receive the SIGTERM first, or the
-        * child process. If we receive it first, the signal handler will call
-        * proc_exit, otherwise we do it here. If we or the child process received
-        * SIGTERM for any other reason than a fast shutdown request, postmaster
-        * will perform an immediate shutdown when it sees us exiting
-        * unexpectedly.
-        *
-        * Per the Single Unix Spec, shells report exit status > 128 when a called
-        * command died on a signal.  Also, 126 and 127 are used to report
-        * problems such as an unfindable command; treat those as fatal errors
-        * too.
-        */
-       if (WIFSIGNALED(rc) && WTERMSIG(rc) == SIGTERM)
-               proc_exit(1);
-
-       signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125;
-
-       ereport(signaled ? FATAL : DEBUG2,
-               (errmsg("could not restore file \"%s\" from archive: return code %d",
-                               xlogfname, rc)));
-
-not_available:
-
-       /*
-        * if an archived file is not available, there might still be a version of
-        * this file in XLOGDIR, so return that as the filename to open.
-        *
-        * In many recovery scenarios we expect this to fail also, but if so that
-        * just means we've reached the end of WAL.
-        */
-       snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlogfname);
-       return false;
-}
-
-/*
- * Attempt to execute an external shell command during recovery.
- *
- * 'command' is the shell command to be executed, 'commandName' is a
- * human-readable name describing the command emitted in the logs. If
- * 'failOnSignal' is true and the command is killed by a signal, a FATAL
- * error is thrown. Otherwise a WARNING is emitted.
- *
- * This is currently used for recovery_end_command and archive_cleanup_command.
- */
-static void
-ExecuteRecoveryCommand(char *command, char *commandName, bool failOnSignal)
-{
-       char            xlogRecoveryCmd[MAXPGPATH];
-       char            lastRestartPointFname[MAXPGPATH];
-       char       *dp;
-       char       *endp;
-       const char *sp;
-       int                     rc;
-       bool            signaled;
-       XLogSegNo       restartSegNo;
-
-       Assert(command && commandName);
-
-       /*
-        * Calculate the archive file cutoff point for use during log shipping
-        * replication. All files earlier than this point can be deleted from the
-        * archive, though there is no requirement to do so.
-        */
-       LWLockAcquire(ControlFileLock, LW_SHARED);
-       XLByteToSeg(ControlFile->checkPointCopy.redo, restartSegNo);
-       XLogFileName(lastRestartPointFname,
-                                ControlFile->checkPointCopy.ThisTimeLineID,
-                                restartSegNo);
-       LWLockRelease(ControlFileLock);
-
-       /*
-        * construct the command to be executed
-        */
-       dp = xlogRecoveryCmd;
-       endp = xlogRecoveryCmd + MAXPGPATH - 1;
-       *endp = '\0';
-
-       for (sp = command; *sp; sp++)
-       {
-               if (*sp == '%')
-               {
-                       switch (sp[1])
-                       {
-                               case 'r':
-                                       /* %r: filename of last restartpoint */
-                                       sp++;
-                                       StrNCpy(dp, lastRestartPointFname, endp - dp);
-                                       dp += strlen(dp);
-                                       break;
-                               case '%':
-                                       /* convert %% to a single % */
-                                       sp++;
-                                       if (dp < endp)
-                                               *dp++ = *sp;
-                                       break;
-                               default:
-                                       /* otherwise treat the % as not special */
-                                       if (dp < endp)
-                                               *dp++ = *sp;
-                                       break;
-                       }
-               }
-               else
-               {
-                       if (dp < endp)
-                               *dp++ = *sp;
-               }
-       }
-       *dp = '\0';
-
-       ereport(DEBUG3,
-                       (errmsg_internal("executing %s \"%s\"", commandName, command)));
-
-       /*
-        * execute the constructed command
-        */
-       rc = system(xlogRecoveryCmd);
-       if (rc != 0)
-       {
-               /*
-                * If the failure was due to any sort of signal, it's best to punt and
-                * abort recovery. See also detailed comments on signals in
-                * RestoreArchivedFile().
-                */
-               signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125;
-
-               ereport((signaled && failOnSignal) ? FATAL : WARNING,
-               /*------
-                  translator: First %s represents a recovery.conf parameter name like
-                 "recovery_end_command", and the 2nd is the value of that parameter. */
-                               (errmsg("%s \"%s\": return code %d", commandName,
-                                               command, rc)));
-       }
-}
-
 /*
  * Preallocate log files beyond the specified log endpoint.
  *
@@ -4304,140 +3749,6 @@ ValidXLogRecordHeader(XLogRecPtr *RecPtr, XLogRecord *record, int emode,
        return true;
 }
 
-/*
- * Try to read a timeline's history file.
- *
- * If successful, return the list of component TLIs (the given TLI followed by
- * its ancestor TLIs). If we can't find the history file, assume that the
- * timeline has no parents, and return a list of just the specified timeline
- * ID.
- */
-static List *
-readTimeLineHistory(TimeLineID targetTLI)
-{
-       List       *result;
-       char            path[MAXPGPATH];
-       char            histfname[MAXFNAMELEN];
-       char            fline[MAXPGPATH];
-       FILE       *fd;
-
-       /* Timeline 1 does not have a history file, so no need to check */
-       if (targetTLI == 1)
-               return list_make1_int((int) targetTLI);
-
-       if (InArchiveRecovery)
-       {
-               TLHistoryFileName(histfname, targetTLI);
-               RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0);
-       }
-       else
-               TLHistoryFilePath(path, targetTLI);
-
-       fd = AllocateFile(path, "r");
-       if (fd == NULL)
-       {
-               if (errno != ENOENT)
-                       ereport(FATAL,
-                                       (errcode_for_file_access(),
-                                        errmsg("could not open file \"%s\": %m", path)));
-               /* Not there, so assume no parents */
-               return list_make1_int((int) targetTLI);
-       }
-
-       result = NIL;
-
-       /*
-        * Parse the file...
-        */
-       while (fgets(fline, sizeof(fline), fd) != NULL)
-       {
-               /* skip leading whitespace and check for # comment */
-               char       *ptr;
-               char       *endptr;
-               TimeLineID      tli;
-
-               for (ptr = fline; *ptr; ptr++)
-               {
-                       if (!isspace((unsigned char) *ptr))
-                               break;
-               }
-               if (*ptr == '\0' || *ptr == '#')
-                       continue;
-
-               /* expect a numeric timeline ID as first field of line */
-               tli = (TimeLineID) strtoul(ptr, &endptr, 0);
-               if (endptr == ptr)
-                       ereport(FATAL,
-                                       (errmsg("syntax error in history file: %s", fline),
-                                        errhint("Expected a numeric timeline ID.")));
-
-               if (result &&
-                       tli <= (TimeLineID) linitial_int(result))
-                       ereport(FATAL,
-                                       (errmsg("invalid data in history file: %s", fline),
-                                  errhint("Timeline IDs must be in increasing sequence.")));
-
-               /* Build list with newest item first */
-               result = lcons_int((int) tli, result);
-
-               /* we ignore the remainder of each line */
-       }
-
-       FreeFile(fd);
-
-       if (result &&
-               targetTLI <= (TimeLineID) linitial_int(result))
-               ereport(FATAL,
-                               (errmsg("invalid data in history file \"%s\"", path),
-                       errhint("Timeline IDs must be less than child timeline's ID.")));
-
-       result = lcons_int((int) targetTLI, result);
-
-       ereport(DEBUG3,
-                       (errmsg_internal("history of timeline %u is %s",
-                                                        targetTLI, nodeToString(result))));
-
-       return result;
-}
-
-/*
- * Probe whether a timeline history file exists for the given timeline ID
- */
-static bool
-existsTimeLineHistory(TimeLineID probeTLI)
-{
-       char            path[MAXPGPATH];
-       char            histfname[MAXFNAMELEN];
-       FILE       *fd;
-
-       /* Timeline 1 does not have a history file, so no need to check */
-       if (probeTLI == 1)
-               return false;
-
-       if (InArchiveRecovery)
-       {
-               TLHistoryFileName(histfname, probeTLI);
-               RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0);
-       }
-       else
-               TLHistoryFilePath(path, probeTLI);
-
-       fd = AllocateFile(path, "r");
-       if (fd != NULL)
-       {
-               FreeFile(fd);
-               return true;
-       }
-       else
-       {
-               if (errno != ENOENT)
-                       ereport(FATAL,
-                                       (errcode_for_file_access(),
-                                        errmsg("could not open file \"%s\": %m", path)));
-               return false;
-       }
-}
-
 /*
  * Scan for new timelines that might have appeared in the archive since we
  * started recovery.
@@ -4501,239 +3812,6 @@ rescanLatestTimeLine(void)
        return false;
 }
 
-/*
- * Find the newest existing timeline, assuming that startTLI exists.
- *
- * Note: while this is somewhat heuristic, it does positively guarantee
- * that (result + 1) is not a known timeline, and therefore it should
- * be safe to assign that ID to a new timeline.
- */
-static TimeLineID
-findNewestTimeLine(TimeLineID startTLI)
-{
-       TimeLineID      newestTLI;
-       TimeLineID      probeTLI;
-
-       /*
-        * The algorithm is just to probe for the existence of timeline history
-        * files.  XXX is it useful to allow gaps in the sequence?
-        */
-       newestTLI = startTLI;
-
-       for (probeTLI = startTLI + 1;; probeTLI++)
-       {
-               if (existsTimeLineHistory(probeTLI))
-               {
-                       newestTLI = probeTLI;           /* probeTLI exists */
-               }
-               else
-               {
-                       /* doesn't exist, assume we're done */
-                       break;
-               }
-       }
-
-       return newestTLI;
-}
-
-/*
- * Create a new timeline history file.
- *
- *     newTLI: ID of the new timeline
- *     parentTLI: ID of its immediate parent
- *     endTLI et al: ID of the last used WAL file, for annotation purposes
- *
- * Currently this is only used during recovery, and so there are no locking
- * considerations.     But we should be just as tense as XLogFileInit to avoid
- * emplacing a bogus file.
- */
-static void
-writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
-                                        TimeLineID endTLI, XLogSegNo endLogSegNo)
-{
-       char            path[MAXPGPATH];
-       char            tmppath[MAXPGPATH];
-       char            histfname[MAXFNAMELEN];
-       char            xlogfname[MAXFNAMELEN];
-       char            buffer[BLCKSZ];
-       int                     srcfd;
-       int                     fd;
-       int                     nbytes;
-
-       Assert(newTLI > parentTLI); /* else bad selection of newTLI */
-
-       /*
-        * Write into a temp file name.
-        */
-       snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
-
-       unlink(tmppath);
-
-       /* do not use get_sync_bit() here --- want to fsync only at end of fill */
-       fd = BasicOpenFile(tmppath, O_RDWR | O_CREAT | O_EXCL,
-                                          S_IRUSR | S_IWUSR);
-       if (fd < 0)
-               ereport(ERROR,
-                               (errcode_for_file_access(),
-                                errmsg("could not create file \"%s\": %m", tmppath)));
-
-       /*
-        * If a history file exists for the parent, copy it verbatim
-        */
-       if (InArchiveRecovery)
-       {
-               TLHistoryFileName(histfname, parentTLI);
-               RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0);
-       }
-       else
-               TLHistoryFilePath(path, parentTLI);
-
-       srcfd = BasicOpenFile(path, O_RDONLY, 0);
-       if (srcfd < 0)
-       {
-               if (errno != ENOENT)
-                       ereport(ERROR,
-                                       (errcode_for_file_access(),
-                                        errmsg("could not open file \"%s\": %m", path)));
-               /* Not there, so assume parent has no parents */
-       }
-       else
-       {
-               for (;;)
-               {
-                       errno = 0;
-                       nbytes = (int) read(srcfd, buffer, sizeof(buffer));
-                       if (nbytes < 0 || errno != 0)
-                               ereport(ERROR,
-                                               (errcode_for_file_access(),
-                                                errmsg("could not read file \"%s\": %m", path)));
-                       if (nbytes == 0)
-                               break;
-                       errno = 0;
-                       if ((int) write(fd, buffer, nbytes) != nbytes)
-                       {
-                               int                     save_errno = errno;
-
-                               /*
-                                * If we fail to make the file, delete it to release disk
-                                * space
-                                */
-                               unlink(tmppath);
-
-                               /*
-                                * if write didn't set errno, assume problem is no disk space
-                                */
-                               errno = save_errno ? save_errno : ENOSPC;
-
-                               ereport(ERROR,
-                                               (errcode_for_file_access(),
-                                        errmsg("could not write to file \"%s\": %m", tmppath)));
-                       }
-               }
-               close(srcfd);
-       }
-
-       /*
-        * Append one line with the details of this timeline split.
-        *
-        * If we did have a parent file, insert an extra newline just in case the
-        * parent file failed to end with one.
-        */
-       XLogFileName(xlogfname, endTLI, endLogSegNo);
-
-       /*
-        * Write comment to history file to explain why and where timeline
-        * changed. Comment varies according to the recovery target used.
-        */
-       if (recoveryTarget == RECOVERY_TARGET_XID)
-               snprintf(buffer, sizeof(buffer),
-                                "%s%u\t%s\t%s transaction %u\n",
-                                (srcfd < 0) ? "" : "\n",
-                                parentTLI,
-                                xlogfname,
-                                recoveryStopAfter ? "after" : "before",
-                                recoveryStopXid);
-       else if (recoveryTarget == RECOVERY_TARGET_TIME)
-               snprintf(buffer, sizeof(buffer),
-                                "%s%u\t%s\t%s %s\n",
-                                (srcfd < 0) ? "" : "\n",
-                                parentTLI,
-                                xlogfname,
-                                recoveryStopAfter ? "after" : "before",
-                                timestamptz_to_str(recoveryStopTime));
-       else if (recoveryTarget == RECOVERY_TARGET_NAME)
-               snprintf(buffer, sizeof(buffer),
-                                "%s%u\t%s\tat restore point \"%s\"\n",
-                                (srcfd < 0) ? "" : "\n",
-                                parentTLI,
-                                xlogfname,
-                                recoveryStopName);
-       else
-               snprintf(buffer, sizeof(buffer),
-                                "%s%u\t%s\tno recovery target specified\n",
-                                (srcfd < 0) ? "" : "\n",
-                                parentTLI,
-                                xlogfname);
-
-       nbytes = strlen(buffer);
-       errno = 0;
-       if ((int) write(fd, buffer, nbytes) != nbytes)
-       {
-               int                     save_errno = errno;
-
-               /*
-                * If we fail to make the file, delete it to release disk space
-                */
-               unlink(tmppath);
-               /* if write didn't set errno, assume problem is no disk space */
-               errno = save_errno ? save_errno : ENOSPC;
-
-               ereport(ERROR,
-                               (errcode_for_file_access(),
-                                errmsg("could not write to file \"%s\": %m", tmppath)));
-       }
-
-       if (pg_fsync(fd) != 0)
-               ereport(ERROR,
-                               (errcode_for_file_access(),
-                                errmsg("could not fsync file \"%s\": %m", tmppath)));
-
-       if (close(fd))
-               ereport(ERROR,
-                               (errcode_for_file_access(),
-                                errmsg("could not close file \"%s\": %m", tmppath)));
-
-
-       /*
-        * Now move the completed history file into place with its final name.
-        */
-       TLHistoryFilePath(path, newTLI);
-
-       /*
-        * Prefer link() to rename() here just to be really sure that we don't
-        * overwrite an existing logfile.  However, there shouldn't be one, so
-        * rename() is an acceptable substitute except for the truly paranoid.
-        */
-#if HAVE_WORKING_LINK
-       if (link(tmppath, path) < 0)
-               ereport(ERROR,
-                               (errcode_for_file_access(),
-                                errmsg("could not link file \"%s\" to \"%s\": %m",
-                                               tmppath, path)));
-       unlink(tmppath);
-#else
-       if (rename(tmppath, path) < 0)
-               ereport(ERROR,
-                               (errcode_for_file_access(),
-                                errmsg("could not rename file \"%s\" to \"%s\": %m",
-                                               tmppath, path)));
-#endif
-
-       /* The history file can be archived immediately. */
-       TLHistoryFileName(histfname, newTLI);
-       XLogArchiveNotify(histfname);
-}
-
 /*
  * I/O routines for pg_control
  *
@@ -6869,11 +5947,35 @@ StartupXLOG(void)
         */
        if (InArchiveRecovery)
        {
+               char    reason[200];
+
                ThisTimeLineID = findNewestTimeLine(recoveryTargetTLI) + 1;
                ereport(LOG,
                                (errmsg("selected new timeline ID: %u", ThisTimeLineID)));
+
+               /*
+                * Write comment to history file to explain why and where timeline
+                * changed. Comment varies according to the recovery target used.
+                */
+               if (recoveryTarget == RECOVERY_TARGET_XID)
+                       snprintf(reason, sizeof(reason),
+                                        "%s transaction %u",
+                                        recoveryStopAfter ? "after" : "before",
+                                        recoveryStopXid);
+               else if (recoveryTarget == RECOVERY_TARGET_TIME)
+                       snprintf(reason, sizeof(reason),
+                                        "%s %s\n",
+                                        recoveryStopAfter ? "after" : "before",
+                                        timestamptz_to_str(recoveryStopTime));
+               else if (recoveryTarget == RECOVERY_TARGET_NAME)
+                       snprintf(reason, sizeof(reason),
+                                        "at restore point \"%s\"",
+                                        recoveryStopName);
+               else
+                       snprintf(reason, sizeof(reason), "no recovery target specified");
+
                writeTimeLineHistory(ThisTimeLineID, recoveryTargetTLI,
-                                                        curFileTLI, endLogSegNo);
+                                                        curFileTLI, endLogSegNo, reason);
        }
 
        /* Save the selected TimeLineID in shared memory, too */
@@ -9918,6 +9020,36 @@ GetXLogWriteRecPtr(void)
        return LogwrtResult.Write;
 }
 
+/*
+ * Returns the redo pointer of the last restartpoint. This is the oldest
+ * point in WAL that we still need, if we have to restart recovery. Returns
+ * InvalidXLogRecPtr if we don't reliably know that point yet, that is,
+ * before we have started WAL redo.
+ *
+ * This function only works in the startup process, and only while we are
+ * in WAL redo. It's important to not return a value before redo has started,
+ * to avoid deleting WAL files that we might still need, but there's no
+ * fundamental reason why this couldn't return a valid value after redo has
+ * finished, or in other processes. This is enough for the current usage,
+ * however.
+ */
+void
+GetOldestRestartPoint(XLogRecPtr *oldrecptr, TimeLineID *oldtli)
+{
+       if (InRedo)
+       {
+               LWLockAcquire(ControlFileLock, LW_SHARED);
+               *oldrecptr = ControlFile->checkPointCopy.redo;
+               *oldtli = ControlFile->checkPointCopy.ThisTimeLineID;
+               LWLockRelease(ControlFileLock);
+       }
+       else
+       {
+               *oldrecptr = InvalidXLogRecPtr;
+               *oldtli = 0;
+       }
+}
+
 /*
  * read_backup_label: check to see if a backup_label file is present
  *
diff --git a/src/backend/access/transam/xlogarchive.c b/src/backend/access/transam/xlogarchive.c
new file mode 100644 (file)
index 0000000..e727957
--- /dev/null
@@ -0,0 +1,572 @@
+/*-------------------------------------------------------------------------
+ *
+ * xlogarchive.c
+ *             Functions for archiving WAL files and restoring from the archive.
+ *
+ *
+ * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/backend/access/transam/xlogarchive.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include "access/xlog_internal.h"
+#include "miscadmin.h"
+#include "postmaster/startup.h"
+#include "storage/fd.h"
+#include "storage/ipc.h"
+#include "storage/lwlock.h"
+#include "storage/pmsignal.h"
+
+/*
+ * Attempt to retrieve the specified file from off-line archival storage.
+ * If successful, fill "path" with its complete path (note that this will be
+ * a temp file name that doesn't follow the normal naming convention), and
+ * return TRUE.
+ *
+ * If not successful, fill "path" with the name of the normal on-line file
+ * (which may or may not actually exist, but we'll try to use it), and return
+ * FALSE.
+ *
+ * For fixed-size files, the caller may pass the expected size as an
+ * additional crosscheck on successful recovery.  If the file size is not
+ * known, set expectedSize = 0.
+ */
+bool
+RestoreArchivedFile(char *path, const char *xlogfname,
+                                       const char *recovername, off_t expectedSize)
+{
+       char            xlogpath[MAXPGPATH];
+       char            xlogRestoreCmd[MAXPGPATH];
+       char            lastRestartPointFname[MAXPGPATH];
+       char       *dp;
+       char       *endp;
+       const char *sp;
+       int                     rc;
+       bool            signaled;
+       struct stat stat_buf;
+       XLogSegNo       restartSegNo;
+       XLogRecPtr      restartRedoPtr;
+       TimeLineID      restartTli;
+
+       /* In standby mode, restore_command might not be supplied */
+       if (recoveryRestoreCommand == NULL)
+               goto not_available;
+
+       /*
+        * When doing archive recovery, we always prefer an archived log file even
+        * if a file of the same name exists in XLOGDIR.  The reason is that the
+        * file in XLOGDIR could be an old, un-filled or partly-filled version
+        * that was copied and restored as part of backing up $PGDATA.
+        *
+        * We could try to optimize this slightly by checking the local copy
+        * lastchange timestamp against the archived copy, but we have no API to
+        * do this, nor can we guarantee that the lastchange timestamp was
+        * preserved correctly when we copied to archive. Our aim is robustness,
+        * so we elect not to do this.
+        *
+        * If we cannot obtain the log file from the archive, however, we will try
+        * to use the XLOGDIR file if it exists.  This is so that we can make use
+        * of log segments that weren't yet transferred to the archive.
+        *
+        * Notice that we don't actually overwrite any files when we copy back
+        * from archive because the restore_command may inadvertently
+        * restore inappropriate xlogs, or they may be corrupt, so we may wish to
+        * fallback to the segments remaining in current XLOGDIR later. The
+        * copy-from-archive filename is always the same, ensuring that we don't
+        * run out of disk space on long recoveries.
+        */
+       snprintf(xlogpath, MAXPGPATH, XLOGDIR "/%s", recovername);
+
+       /*
+        * Make sure there is no existing file named recovername.
+        */
+       if (stat(xlogpath, &stat_buf) != 0)
+       {
+               if (errno != ENOENT)
+                       ereport(FATAL,
+                                       (errcode_for_file_access(),
+                                        errmsg("could not stat file \"%s\": %m",
+                                                       xlogpath)));
+       }
+       else
+       {
+               if (unlink(xlogpath) != 0)
+                       ereport(FATAL,
+                                       (errcode_for_file_access(),
+                                        errmsg("could not remove file \"%s\": %m",
+                                                       xlogpath)));
+       }
+
+       /*
+        * Calculate the archive file cutoff point for use during log shipping
+        * replication. All files earlier than this point can be deleted from the
+        * archive, though there is no requirement to do so.
+        *
+        * We initialise this with the filename of an InvalidXLogRecPtr, which
+        * will prevent the deletion of any WAL files from the archive because of
+        * the alphabetic sorting property of WAL filenames.
+        *
+        * Once we have successfully located the redo pointer of the checkpoint
+        * from which we start recovery we never request a file prior to the redo
+        * pointer of the last restartpoint. When redo begins we know that we have
+        * successfully located it, so there is no need for additional status
+        * flags to signify the point when we can begin deleting WAL files from
+        * the archive.
+        */
+       GetOldestRestartPoint(&restartRedoPtr, &restartTli);
+       if (!XLogRecPtrIsInvalid(restartRedoPtr))
+       {
+               XLByteToSeg(restartRedoPtr, restartSegNo);
+               XLogFileName(lastRestartPointFname, restartTli, restartSegNo);
+               /* we shouldn't need anything earlier than last restart point */
+               Assert(strcmp(lastRestartPointFname, xlogfname) <= 0);
+       }
+       else
+               XLogFileName(lastRestartPointFname, 0, 0L);
+
+       /*
+        * construct the command to be executed
+        */
+       dp = xlogRestoreCmd;
+       endp = xlogRestoreCmd + MAXPGPATH - 1;
+       *endp = '\0';
+
+       for (sp = recoveryRestoreCommand; *sp; sp++)
+       {
+               if (*sp == '%')
+               {
+                       switch (sp[1])
+                       {
+                               case 'p':
+                                       /* %p: relative path of target file */
+                                       sp++;
+                                       StrNCpy(dp, xlogpath, endp - dp);
+                                       make_native_path(dp);
+                                       dp += strlen(dp);
+                                       break;
+                               case 'f':
+                                       /* %f: filename of desired file */
+                                       sp++;
+                                       StrNCpy(dp, xlogfname, endp - dp);
+                                       dp += strlen(dp);
+                                       break;
+                               case 'r':
+                                       /* %r: filename of last restartpoint */
+                                       sp++;
+                                       StrNCpy(dp, lastRestartPointFname, endp - dp);
+                                       dp += strlen(dp);
+                                       break;
+                               case '%':
+                                       /* convert %% to a single % */
+                                       sp++;
+                                       if (dp < endp)
+                                               *dp++ = *sp;
+                                       break;
+                               default:
+                                       /* otherwise treat the % as not special */
+                                       if (dp < endp)
+                                               *dp++ = *sp;
+                                       break;
+                       }
+               }
+               else
+               {
+                       if (dp < endp)
+                               *dp++ = *sp;
+               }
+       }
+       *dp = '\0';
+
+       ereport(DEBUG3,
+                       (errmsg_internal("executing restore command \"%s\"",
+                                                        xlogRestoreCmd)));
+
+       /*
+        * Check signals before restore command and reset afterwards.
+        */
+       PreRestoreCommand();
+
+       /*
+        * Copy xlog from archival storage to XLOGDIR
+        */
+       rc = system(xlogRestoreCmd);
+
+       PostRestoreCommand();
+
+       if (rc == 0)
+       {
+               /*
+                * command apparently succeeded, but let's make sure the file is
+                * really there now and has the correct size.
+                */
+               if (stat(xlogpath, &stat_buf) == 0)
+               {
+                       if (expectedSize > 0 && stat_buf.st_size != expectedSize)
+                       {
+                               int                     elevel;
+
+                               /*
+                                * If we find a partial file in standby mode, we assume it's
+                                * because it's just being copied to the archive, and keep
+                                * trying.
+                                *
+                                * Otherwise treat a wrong-sized file as FATAL to ensure the
+                                * DBA would notice it, but is that too strong? We could try
+                                * to plow ahead with a local copy of the file ... but the
+                                * problem is that there probably isn't one, and we'd
+                                * incorrectly conclude we've reached the end of WAL and we're
+                                * done recovering ...
+                                */
+                               if (StandbyMode && stat_buf.st_size < expectedSize)
+                                       elevel = DEBUG1;
+                               else
+                                       elevel = FATAL;
+                               ereport(elevel,
+                                               (errmsg("archive file \"%s\" has wrong size: %lu instead of %lu",
+                                                               xlogfname,
+                                                               (unsigned long) stat_buf.st_size,
+                                                               (unsigned long) expectedSize)));
+                               return false;
+                       }
+                       else
+                       {
+                               ereport(LOG,
+                                               (errmsg("restored log file \"%s\" from archive",
+                                                               xlogfname)));
+                               strcpy(path, xlogpath);
+                               return true;
+                       }
+               }
+               else
+               {
+                       /* stat failed */
+                       if (errno != ENOENT)
+                               ereport(FATAL,
+                                               (errcode_for_file_access(),
+                                                errmsg("could not stat file \"%s\": %m",
+                                                               xlogpath)));
+               }
+       }
+
+       /*
+        * Remember, we rollforward UNTIL the restore fails so failure here is
+        * just part of the process... that makes it difficult to determine
+        * whether the restore failed because there isn't an archive to restore,
+        * or because the administrator has specified the restore program
+        * incorrectly.  We have to assume the former.
+        *
+        * However, if the failure was due to any sort of signal, it's best to
+        * punt and abort recovery.  (If we "return false" here, upper levels will
+        * assume that recovery is complete and start up the database!) It's
+        * essential to abort on child SIGINT and SIGQUIT, because per spec
+        * system() ignores SIGINT and SIGQUIT while waiting; if we see one of
+        * those it's a good bet we should have gotten it too.
+        *
+        * On SIGTERM, assume we have received a fast shutdown request, and exit
+        * cleanly. It's pure chance whether we receive the SIGTERM first, or the
+        * child process. If we receive it first, the signal handler will call
+        * proc_exit, otherwise we do it here. If we or the child process received
+        * SIGTERM for any other reason than a fast shutdown request, postmaster
+        * will perform an immediate shutdown when it sees us exiting
+        * unexpectedly.
+        *
+        * Per the Single Unix Spec, shells report exit status > 128 when a called
+        * command died on a signal.  Also, 126 and 127 are used to report
+        * problems such as an unfindable command; treat those as fatal errors
+        * too.
+        */
+       if (WIFSIGNALED(rc) && WTERMSIG(rc) == SIGTERM)
+               proc_exit(1);
+
+       signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125;
+
+       ereport(signaled ? FATAL : DEBUG2,
+               (errmsg("could not restore file \"%s\" from archive: return code %d",
+                               xlogfname, rc)));
+
+not_available:
+
+       /*
+        * if an archived file is not available, there might still be a version of
+        * this file in XLOGDIR, so return that as the filename to open.
+        *
+        * In many recovery scenarios we expect this to fail also, but if so that
+        * just means we've reached the end of WAL.
+        */
+       snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlogfname);
+       return false;
+}
+
+/*
+ * Attempt to execute an external shell command during recovery.
+ *
+ * 'command' is the shell command to be executed, 'commandName' is a
+ * human-readable name describing the command emitted in the logs. If
+ * 'failOnSignal' is true and the command is killed by a signal, a FATAL
+ * error is thrown. Otherwise a WARNING is emitted.
+ *
+ * This is currently used for recovery_end_command and archive_cleanup_command.
+ */
+void
+ExecuteRecoveryCommand(char *command, char *commandName, bool failOnSignal)
+{
+       char            xlogRecoveryCmd[MAXPGPATH];
+       char            lastRestartPointFname[MAXPGPATH];
+       char       *dp;
+       char       *endp;
+       const char *sp;
+       int                     rc;
+       bool            signaled;
+       XLogSegNo       restartSegNo;
+       XLogRecPtr      restartRedoPtr;
+       TimeLineID      restartTli;
+
+       Assert(command && commandName);
+
+       /*
+        * Calculate the archive file cutoff point for use during log shipping
+        * replication. All files earlier than this point can be deleted from the
+        * archive, though there is no requirement to do so.
+        */
+       GetOldestRestartPoint(&restartRedoPtr, &restartTli);
+       XLByteToSeg(restartRedoPtr, restartSegNo);
+       XLogFileName(lastRestartPointFname, restartTli, restartSegNo);
+       LWLockRelease(ControlFileLock);
+
+       /*
+        * construct the command to be executed
+        */
+       dp = xlogRecoveryCmd;
+       endp = xlogRecoveryCmd + MAXPGPATH - 1;
+       *endp = '\0';
+
+       for (sp = command; *sp; sp++)
+       {
+               if (*sp == '%')
+               {
+                       switch (sp[1])
+                       {
+                               case 'r':
+                                       /* %r: filename of last restartpoint */
+                                       sp++;
+                                       StrNCpy(dp, lastRestartPointFname, endp - dp);
+                                       dp += strlen(dp);
+                                       break;
+                               case '%':
+                                       /* convert %% to a single % */
+                                       sp++;
+                                       if (dp < endp)
+                                               *dp++ = *sp;
+                                       break;
+                               default:
+                                       /* otherwise treat the % as not special */
+                                       if (dp < endp)
+                                               *dp++ = *sp;
+                                       break;
+                       }
+               }
+               else
+               {
+                       if (dp < endp)
+                               *dp++ = *sp;
+               }
+       }
+       *dp = '\0';
+
+       ereport(DEBUG3,
+                       (errmsg_internal("executing %s \"%s\"", commandName, command)));
+
+       /*
+        * execute the constructed command
+        */
+       rc = system(xlogRecoveryCmd);
+       if (rc != 0)
+       {
+               /*
+                * If the failure was due to any sort of signal, it's best to punt and
+                * abort recovery. See also detailed comments on signals in
+                * RestoreArchivedFile().
+                */
+               signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125;
+
+               ereport((signaled && failOnSignal) ? FATAL : WARNING,
+               /*------
+                  translator: First %s represents a recovery.conf parameter name like
+                 "recovery_end_command", and the 2nd is the value of that parameter. */
+                               (errmsg("%s \"%s\": return code %d", commandName,
+                                               command, rc)));
+       }
+}
+
+
+/*
+ * XLogArchiveNotify
+ *
+ * Create an archive notification file
+ *
+ * The name of the notification file is the message that will be picked up
+ * by the archiver, e.g. we write 0000000100000001000000C6.ready
+ * and the archiver then knows to archive XLOGDIR/0000000100000001000000C6,
+ * then when complete, rename it to 0000000100000001000000C6.done
+ */
+void
+XLogArchiveNotify(const char *xlog)
+{
+       char            archiveStatusPath[MAXPGPATH];
+       FILE       *fd;
+
+       /* insert an otherwise empty file called <XLOG>.ready */
+       StatusFilePath(archiveStatusPath, xlog, ".ready");
+       fd = AllocateFile(archiveStatusPath, "w");
+       if (fd == NULL)
+       {
+               ereport(LOG,
+                               (errcode_for_file_access(),
+                                errmsg("could not create archive status file \"%s\": %m",
+                                               archiveStatusPath)));
+               return;
+       }
+       if (FreeFile(fd))
+       {
+               ereport(LOG,
+                               (errcode_for_file_access(),
+                                errmsg("could not write archive status file \"%s\": %m",
+                                               archiveStatusPath)));
+               return;
+       }
+
+       /* Notify archiver that it's got something to do */
+       if (IsUnderPostmaster)
+               SendPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER);
+}
+
+/*
+ * Convenience routine to notify using segment number representation of filename
+ */
+void
+XLogArchiveNotifySeg(XLogSegNo segno)
+{
+       char            xlog[MAXFNAMELEN];
+
+       XLogFileName(xlog, ThisTimeLineID, segno);
+       XLogArchiveNotify(xlog);
+}
+
+/*
+ * XLogArchiveCheckDone
+ *
+ * This is called when we are ready to delete or recycle an old XLOG segment
+ * file or backup history file.  If it is okay to delete it then return true.
+ * If it is not time to delete it, make sure a .ready file exists, and return
+ * false.
+ *
+ * If <XLOG>.done exists, then return true; else if <XLOG>.ready exists,
+ * then return false; else create <XLOG>.ready and return false.
+ *
+ * The reason we do things this way is so that if the original attempt to
+ * create <XLOG>.ready fails, we'll retry during subsequent checkpoints.
+ */
+bool
+XLogArchiveCheckDone(const char *xlog)
+{
+       char            archiveStatusPath[MAXPGPATH];
+       struct stat stat_buf;
+
+       /* Always deletable if archiving is off */
+       if (!XLogArchivingActive())
+               return true;
+
+       /* First check for .done --- this means archiver is done with it */
+       StatusFilePath(archiveStatusPath, xlog, ".done");
+       if (stat(archiveStatusPath, &stat_buf) == 0)
+               return true;
+
+       /* check for .ready --- this means archiver is still busy with it */
+       StatusFilePath(archiveStatusPath, xlog, ".ready");
+       if (stat(archiveStatusPath, &stat_buf) == 0)
+               return false;
+
+       /* Race condition --- maybe archiver just finished, so recheck */
+       StatusFilePath(archiveStatusPath, xlog, ".done");
+       if (stat(archiveStatusPath, &stat_buf) == 0)
+               return true;
+
+       /* Retry creation of the .ready file */
+       XLogArchiveNotify(xlog);
+       return false;
+}
+
+/*
+ * XLogArchiveIsBusy
+ *
+ * Check to see if an XLOG segment file is still unarchived.
+ * This is almost but not quite the inverse of XLogArchiveCheckDone: in
+ * the first place we aren't chartered to recreate the .ready file, and
+ * in the second place we should consider that if the file is already gone
+ * then it's not busy.  (This check is needed to handle the race condition
+ * that a checkpoint already deleted the no-longer-needed file.)
+ */
+bool
+XLogArchiveIsBusy(const char *xlog)
+{
+       char            archiveStatusPath[MAXPGPATH];
+       struct stat stat_buf;
+
+       /* First check for .done --- this means archiver is done with it */
+       StatusFilePath(archiveStatusPath, xlog, ".done");
+       if (stat(archiveStatusPath, &stat_buf) == 0)
+               return false;
+
+       /* check for .ready --- this means archiver is still busy with it */
+       StatusFilePath(archiveStatusPath, xlog, ".ready");
+       if (stat(archiveStatusPath, &stat_buf) == 0)
+               return true;
+
+       /* Race condition --- maybe archiver just finished, so recheck */
+       StatusFilePath(archiveStatusPath, xlog, ".done");
+       if (stat(archiveStatusPath, &stat_buf) == 0)
+               return false;
+
+       /*
+        * Check to see if the WAL file has been removed by checkpoint, which
+        * implies it has already been archived, and explains why we can't see a
+        * status file for it.
+        */
+       snprintf(archiveStatusPath, MAXPGPATH, XLOGDIR "/%s", xlog);
+       if (stat(archiveStatusPath, &stat_buf) != 0 &&
+               errno == ENOENT)
+               return false;
+
+       return true;
+}
+
+/*
+ * XLogArchiveCleanup
+ *
+ * Cleanup archive notification file(s) for a particular xlog segment
+ */
+void
+XLogArchiveCleanup(const char *xlog)
+{
+       char            archiveStatusPath[MAXPGPATH];
+
+       /* Remove the .done file */
+       StatusFilePath(archiveStatusPath, xlog, ".done");
+       unlink(archiveStatusPath);
+       /* should we complain about failure? */
+
+       /* Remove the .ready file if present --- normally it shouldn't be */
+       StatusFilePath(archiveStatusPath, xlog, ".ready");
+       unlink(archiveStatusPath);
+       /* should we complain about failure? */
+}
diff --git a/src/include/access/timeline.h b/src/include/access/timeline.h
new file mode 100644 (file)
index 0000000..f2a7658
--- /dev/null
@@ -0,0 +1,23 @@
+/*
+ * timeline.h
+ *
+ * Functions for reading and writing timeline history files.
+ *
+ * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/timeline.h
+ */
+#ifndef TIMELINE_H
+#define TIMELINE_H
+
+#include "access/xlogdefs.h"
+#include "nodes/pg_list.h"
+
+extern List *readTimeLineHistory(TimeLineID targetTLI);
+extern bool existsTimeLineHistory(TimeLineID probeTLI);
+extern TimeLineID findNewestTimeLine(TimeLineID startTLI);
+extern void writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
+                                        TimeLineID endTLI, XLogSegNo endLogSegNo, char *reason);
+
+#endif   /* TIMELINE_H */
index b5bfb7b407282373a7fbe162fcb91733700cf921..2c66b2feb8afc470113e55b56b71a9237ad0816a 100644 (file)
@@ -229,6 +229,29 @@ extern const RmgrData RmgrTable[];
 extern pg_time_t GetLastSegSwitchTime(void);
 extern XLogRecPtr RequestXLogSwitch(void);
 
+extern void GetOldestRestartPoint(XLogRecPtr *oldrecptr, TimeLineID *oldtli);
+
+/*
+ * Exported for the functions in timeline.c and xlogarchive.c.  Only valid
+ * in the startup process.
+ */
+extern bool InArchiveRecovery;
+extern bool StandbyMode;
+extern char *recoveryRestoreCommand;
+
+/*
+ * Prototypes for functions in xlogarchive.c
+ */
+extern bool RestoreArchivedFile(char *path, const char *xlogfname,
+                                       const char *recovername, off_t expectedSize);
+extern void ExecuteRecoveryCommand(char *command, char *commandName,
+                                          bool failOnerror);
+extern void XLogArchiveNotify(const char *xlog);
+extern void XLogArchiveNotifySeg(XLogSegNo segno);
+extern bool XLogArchiveCheckDone(const char *xlog);
+extern bool XLogArchiveIsBusy(const char *xlog);
+extern void XLogArchiveCleanup(const char *xlog);
+
 /*
  * These aren't in xlog.h because I'd rather not include fmgr.h there.
  */