From ab94a6978b84ab385f2f82e51a32463a05b0d0cb Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki@enterprisedb.com>
Date: Mon, 9 Feb 2009 14:54:41 +0200
Subject: [PATCH] Add IsRecoveryProcessingMode() quick exits to XLogNeedsFlush,
 XLogAsyncCommitFlush and XLogBackgroundFlush. Fix restore command invocation
 so that fast shutdown requests are not lost. Update minRecoveryPoint in
 CreateRestartPoint when we can't create a restart point.

---
 src/backend/access/transam/xlog.c | 121 +++++++++++++++++++++---------
 1 file changed, 84 insertions(+), 37 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 0ace629e8f..87e4551b0e 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -429,6 +429,11 @@ static bool InRedo = false;
  * Flag set by interrupt handlers for later service in the redo loop.
  */
 static volatile sig_atomic_t shutdown_requested = false;
+/*
+ * Flag set when executing a restore command, to tell SIGTERM signal handler
+ * that it's safe to just proc_exit(0).
+ */
+static volatile sig_atomic_t in_restore_command = false;
 
 
 static void XLogArchiveNotify(const char *xlog);
@@ -460,7 +465,7 @@ static void PreallocXlogFiles(XLogRecPtr endptr);
 static void RemoveOldXlogFiles(uint32 log, uint32 seg, XLogRecPtr endptr);
 static void ValidateXLOGDirectoryStructure(void);
 static void CleanupBackupHistory(void);
-static void UpdateMinRecoveryPoint(XLogRecPtr lsn);
+static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force);
 static XLogRecord *ReadRecord(XLogRecPtr *RecPtr, int emode);
 static bool ValidXLOGHeader(XLogPageHeader hdr, int emode);
 static XLogRecord *ReadCheckpointRecord(XLogRecPtr RecPtr, int whichChkpt);
@@ -1766,14 +1771,16 @@ XLogSetAsyncCommitLSN(XLogRecPtr asyncCommitLSN)
  * Advance minRecoveryPoint in control file.
  *
  * If we crash during recovery, we must reach this point again before the
- * database is consistent. If minRecoveryPoint is already greater than or
- * equal to 'lsn', it is not updated.
+ * database is consistent. 
+ * 
+ * If 'force' is true, 'lsn' argument is ignored. Otherwise, minRecoveryPoint
+ * is is only updated if it's already greater than or equal to 'lsn'.
  */
 static void
-UpdateMinRecoveryPoint(XLogRecPtr lsn)
+UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force)
 {
 	/* Quick check using our local copy of the variable */
-	if (!updateMinRecoveryPoint || XLByteLE(lsn, minRecoveryPoint))
+	if (!updateMinRecoveryPoint || (!force && XLByteLE(lsn, minRecoveryPoint)))
 		return;
 
 	LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
@@ -1787,10 +1794,11 @@ UpdateMinRecoveryPoint(XLogRecPtr lsn)
 	 */
 	if (minRecoveryPoint.xlogid == 0 && minRecoveryPoint.xrecoff == 0)
 		updateMinRecoveryPoint = false;
-	else if (XLByteLT(minRecoveryPoint, lsn))
+	else if (force || XLByteLT(minRecoveryPoint, lsn))
 	{
 		/* use volatile pointer to prevent code rearrangement */
 		volatile XLogCtlData *xlogctl = XLogCtl;
+		XLogRecPtr newMinRecoveryPoint;
 
 		/*
 		 * To avoid having to update the control file too often, we update
@@ -1798,12 +1806,16 @@ UpdateMinRecoveryPoint(XLogRecPtr lsn)
 		 * would suffice for correctness.
 		 */
 		SpinLockAcquire(&xlogctl->info_lck);
-		minRecoveryPoint = xlogctl->replayEndRecPtr;
+		newMinRecoveryPoint = xlogctl->replayEndRecPtr;
 		SpinLockRelease(&xlogctl->info_lck);
 
 		/* update control file */
-		ControlFile->minRecoveryPoint = minRecoveryPoint;
-		UpdateControlFile();
+		if (XLByteLT(ControlFile->minRecoveryPoint, newMinRecoveryPoint))
+		{
+			ControlFile->minRecoveryPoint = newMinRecoveryPoint;
+			UpdateControlFile();
+			minRecoveryPoint = newMinRecoveryPoint;
+		}
 
 		elog(DEBUG2, "updated min recovery point to %X/%X",
 			 minRecoveryPoint.xlogid, minRecoveryPoint.xrecoff);
@@ -1829,7 +1841,7 @@ XLogFlush(XLogRecPtr record)
 	 */
 	if (IsRecoveryProcessingMode())
 	{
-		UpdateMinRecoveryPoint(record);
+		UpdateMinRecoveryPoint(record, false);
 		return;
 	}
 
@@ -1957,6 +1969,10 @@ XLogBackgroundFlush(void)
 	XLogRecPtr	WriteRqstPtr;
 	bool		flexible = true;
 
+	/* XLOG doesn't need flushing during recovery */
+	if (IsRecoveryProcessingMode())
+		return;
+
 	/* read LogwrtResult and update local state */
 	{
 		/* use volatile pointer to prevent code rearrangement */
@@ -2028,6 +2044,10 @@ XLogAsyncCommitFlush(void)
 	/* use volatile pointer to prevent code rearrangement */
 	volatile XLogCtlData *xlogctl = XLogCtl;
 
+	/* There's no asynchronously committed transactions during recovery */
+	if (IsRecoveryProcessingMode())
+		return;
+
 	SpinLockAcquire(&xlogctl->info_lck);
 	WriteRqstPtr = xlogctl->asyncCommitLSN;
 	SpinLockRelease(&xlogctl->info_lck);
@@ -2044,6 +2064,10 @@ XLogAsyncCommitFlush(void)
 bool
 XLogNeedsFlush(XLogRecPtr record)
 {
+	/* XLOG doesn't flushing during recovery */
+	if (IsRecoveryProcessingMode())
+		return false;
+
 	/* Quick exit if already known flushed */
 	if (XLByteLE(record, LogwrtResult.Flush))
 		return false;
@@ -2718,10 +2742,23 @@ RestoreArchivedFile(char *path, const char *xlogfname,
 			(errmsg_internal("executing restore command \"%s\"",
 							 xlogRestoreCmd)));
 
+
+	/*
+	 * Set in_restore_command to indicate that we should just exit on
+	 * SIGTERM. We know that we're in a safe point to do that. Check
+	 * if we had already received the signal.
+	 */
+	in_restore_command = true;
+	if (shutdown_requested)
+		proc_exit(0);
+
 	/*
 	 * Copy xlog from archival storage to XLOGDIR
 	 */
 	rc = system(xlogRestoreCmd);
+
+	in_restore_command = false;
+
 	if (rc == 0)
 	{
 		/*
@@ -2774,25 +2811,22 @@ RestoreArchivedFile(char *path, const char *xlogfname,
 	 * assume that recovery is complete and start up the database!) It's
 	 * essential to abort on child SIGINT and SIGQUIT, because per spec
 	 * system() ignores SIGINT and SIGQUIT while waiting; if we see one of
-	 * those it's a good bet we should have gotten it too.  Aborting on other
-	 * signals such as SIGTERM seems a good idea as well.
+	 * those it's a good bet we should have gotten it too.
 	 *
-	 * However, if we were requested to terminate, we don't really care what
-	 * happened to the restore command, so we just exit cleanly. In fact,
-	 * the restore command most likely received the SIGTERM too, and we don't
-	 * want to complain about that.
+	 * On SIGTERM, assume we have received a fast shutdown request, and exit
+	 * cleanly. It's pure chance whether we receive the SIGTERM first, or the
+	 * child process. If we receive it first, the signal handler will call
+	 * proc_exit(0), otherwise we do it here. If we received SIGTERM for any
+	 * other reason, postmaster will perform an immediate shutdown when it
+	 * sees us exiting unexpectedly.
 	 *
 	 * Per the Single Unix Spec, shells report exit status > 128 when a called
 	 * command died on a signal.  Also, 126 and 127 are used to report
 	 * problems such as an unfindable command; treat those as fatal errors
 	 * too.
 	 */
-	if (shutdown_requested && InRedo)
-	{
-		/* XXX: Is EndRecPtr always the right value here? */
-		UpdateMinRecoveryPoint(EndRecPtr);
+	if (WTERMSIG(rc) == SIGTERM)
 		proc_exit(0);
-	}
 
 	signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125;
 
@@ -5335,10 +5369,7 @@ StartupXLOG(void)
 				 * recovery.
 				 */
 				if (shutdown_requested)
-				{
-					UpdateMinRecoveryPoint(ReadRecPtr);
 					proc_exit(0);
-				}
 
 				/*
 				 * Have we reached our safe starting point? If so, we can
@@ -6437,28 +6468,41 @@ CreateRestartPoint(int flags)
 	memcpy(&lastCheckPoint, &XLogCtl->lastCheckPoint, sizeof(CheckPoint));
 	SpinLockRelease(&xlogctl->info_lck);
 
-	/*
-	 * If the last checkpoint record we've replayed is already our last
-	 * restartpoint, we're done.
+	/* 
+	 * Check that we're still in recovery mode. It's ok if we exit recovery
+	 * mode after this check, the restart point is valid anyway.
 	 */
-	if (XLogRecPtrIsInvalid(lastCheckPointRecPtr) ||
-		XLByteLE(lastCheckPoint.redo, ControlFile->checkPointCopy.redo))
+	if (!IsRecoveryProcessingMode())
 	{
 		ereport(DEBUG2,
-				(errmsg("skipping restartpoint, already performed at %X/%X",
-						lastCheckPoint.redo.xlogid, lastCheckPoint.redo.xrecoff)));
+				(errmsg("skipping restartpoint, recovery has already ended")));
 		LWLockRelease(CheckpointLock);
 		return false;
 	}
 
-	/* 
-	 * Check that we're still in recovery mode. It's ok if we exit recovery
-	 * mode after this check, the restart point is valid anyway.
+	/*
+	 * If the last checkpoint record we've replayed is already our last
+	 * restartpoint, we can't perform a new restart point. We still update
+	 * minRecoveryPoint in that case, so that if this is a shutdown restart
+	 * point, we won't start up earlier than before. That's not strictly
+	 * necessary, but when we get hot standby capability, it would be rather
+	 * weird if the database opened up for read-only connections at a
+	 * point-in-time before the last shutdown. Such time travel is still
+	 * possible in case of immediate shutdown, though.
+	 *
+	 * We don't explicitly advance minRecoveryPoint when we do create a
+	 * restartpoint. It's assumed that flushing the buffers will do that
+	 * as a side-effect.
 	 */
-	if (!IsRecoveryProcessingMode())
+	if (XLogRecPtrIsInvalid(lastCheckPointRecPtr) ||
+		XLByteLE(lastCheckPoint.redo, ControlFile->checkPointCopy.redo))
 	{
+		XLogRecPtr InvalidXLogRecPtr = {0, 0};
 		ereport(DEBUG2,
-				(errmsg("skipping restartpoint, recovery has already ended")));
+				(errmsg("skipping restartpoint, already performed at %X/%X",
+						lastCheckPoint.redo.xlogid, lastCheckPoint.redo.xrecoff)));
+
+		UpdateMinRecoveryPoint(InvalidXLogRecPtr, true);
 		LWLockRelease(CheckpointLock);
 		return false;
 	}
@@ -7599,7 +7643,10 @@ startupproc_quickdie(SIGNAL_ARGS)
 static void
 StartupProcShutdownHandler(SIGNAL_ARGS)
 {
-	shutdown_requested = true;
+	if (in_restore_command)
+		proc_exit(0);
+	else
+		shutdown_requested = true;
 }
 
 /* Main entry point for startup process */
-- 
2.39.5