From c4cbbe53ee2caeede279c4da936fdc3369e1fe6e Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki@enterprisedb.com>
Date: Mon, 9 Feb 2009 10:13:19 +0200
Subject: [PATCH] If bgwriter skips a restartpoint because we haven't seen any
 new checkpoint records since last restartpoint, try again in 15 s, instead of
 checkpoint_timeout. Plus some comment changes

---
 src/backend/access/transam/xlog.c | 25 ++++++++++++-------
 src/backend/postmaster/bgwriter.c | 40 ++++++++++++++++++++++---------
 src/include/access/xlog.h         |  2 +-
 3 files changed, 46 insertions(+), 21 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 1772f19301..bcc54eb174 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -338,7 +338,7 @@ typedef struct XLogCtlData
 	 * here. It's used by the background writer when it wants to create
 	 * a restartpoint.
 	 *
-	 * is info_lck spinlock a bit too light-weight to protect these?
+	 * Protected by info_lck. XXX Is a spinlock too light-weight for these?
 	 */
 	XLogRecPtr	lastCheckPointRecPtr;
 	CheckPoint	lastCheckPoint;
@@ -5294,7 +5294,11 @@ StartupXLOG(void)
 						minRecoveryPoint.xlogid, minRecoveryPoint.xrecoff)));
 
 			/*
-			 * Let postmaster know we've started redo now.
+			 * Let postmaster know we've started redo now, so that it can
+			 * launch bgwriter.  We don't bother during crash recovery; we can
+			 * only perform restartpoints during archive recovery anyway. And
+			 * we'd like to keep crash recovery simple, to avoid introducing
+			 * bugs into that codepath that could stop you from recovering.
 			 *
 			 * After this point, we can no longer assume that there's no other
 			 * processes running concurrently.
@@ -5327,13 +5331,11 @@ StartupXLOG(void)
 #endif
 
 				/*
-				 * Process any requests or signals received recently.
+				 * Check if we were requested to exit without finishing
+				 * recovery.
 				 */
 				if (shutdown_requested)
 				{
-					/*
-					 * We were requested to exit without finishing recovery.
-					 */
 					UpdateMinRecoveryPoint(ReadRecPtr);
 					proc_exit(0);
 				}
@@ -6391,8 +6393,12 @@ RecoveryRestartPoint(const CheckPoint *checkPoint)
  * This is similar to CreateCheckPoint, but is used during WAL recovery
  * to establish a point from which recovery can roll forward without
  * replaying the entire recovery log.
+ *
+ * Returns true if a new restartpoint was established. We can only establish
+ * a restartpoint if we have replayed a checkpoint record since last
+ * restartpoint.
  */
-void
+bool
 CreateRestartPoint(int flags)
 {
 	XLogRecPtr lastCheckPointRecPtr;
@@ -6423,7 +6429,7 @@ CreateRestartPoint(int flags)
 				(errmsg("skipping restartpoint, already performed at %X/%X",
 						lastCheckPoint.redo.xlogid, lastCheckPoint.redo.xrecoff)));
 		LWLockRelease(CheckpointLock);
-		return;
+		return false;
 	}
 
 	/* 
@@ -6435,7 +6441,7 @@ CreateRestartPoint(int flags)
 		ereport(DEBUG2,
 				(errmsg("skipping restartpoint, recovery has already ended")));
 		LWLockRelease(CheckpointLock);
-		return;
+		return false;
 	}
 
 	if (log_checkpoints)
@@ -6482,6 +6488,7 @@ CreateRestartPoint(int flags)
 					timestamptz_to_str(recoveryLastXTime))));
 
 	LWLockRelease(CheckpointLock);
+	return true;
 }
 
 /*
diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c
index 58b982dac1..c1f29189ea 100644
--- a/src/backend/postmaster/bgwriter.c
+++ b/src/backend/postmaster/bgwriter.c
@@ -439,6 +439,8 @@ BackgroundWriterMain(void)
 		 */
 		if (do_checkpoint)
 		{
+			bool	ckpt_performed = false;
+
 			/* use volatile pointer to prevent code rearrangement */
 			volatile BgWriterShmemStruct *bgs = BgWriterShmem;
 
@@ -481,9 +483,12 @@ BackgroundWriterMain(void)
 			 * Do the checkpoint.
 			 */
 			if (!BgWriterRecoveryMode)
+			{
 				CreateCheckPoint(flags);
+				ckpt_performed = true;
+			}
 			else
-				CreateRestartPoint(flags);
+				ckpt_performed = CreateRestartPoint(flags);
 
 			/*
 			 * After any checkpoint, close all smgr files.	This is so we
@@ -494,18 +499,31 @@ BackgroundWriterMain(void)
 			/*
 			 * Indicate checkpoint completion to any waiting backends.
 			 */
-			SpinLockAcquire(&bgs->ckpt_lck);
-			bgs->ckpt_done = bgs->ckpt_started;
-			SpinLockRelease(&bgs->ckpt_lck);
+			if (ckpt_performed)
+			{
+				SpinLockAcquire(&bgs->ckpt_lck);
+				bgs->ckpt_done = bgs->ckpt_started;
+				SpinLockRelease(&bgs->ckpt_lck);
+
+				/*
+				 * Note we record the checkpoint start time not end time as
+				 * last_checkpoint_time.  This is so that time-driven
+				 * checkpoints happen at a predictable spacing.
+				 */
+				last_checkpoint_time = now;
+			}
+			else
+			{
+				/*
+				 * We were not able to perform the restartpoint (checkpoints
+				 * throw an ERROR in case of error).  Most likely because we
+				 * have not received a new checkpoint WAL record since the
+				 * last restartpoint. Try again in 15 s.
+				 */
+				last_checkpoint_time = now - CheckPointTimeout - 15;
+			}
 
 			ckpt_active = false;
-
-			/*
-			 * Note we record the checkpoint start time not end time as
-			 * last_checkpoint_time.  This is so that time-driven checkpoints
-			 * happen at a predictable spacing.
-			 */
-			last_checkpoint_time = now;
 		}
 		else
 			BgBufferSync();
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 2a9ed7078e..d0155bd2de 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -218,7 +218,7 @@ extern void StartupXLOG(void);
 extern void ShutdownXLOG(int code, Datum arg);
 extern void InitXLOGAccess(void);
 extern void CreateCheckPoint(int flags);
-extern void CreateRestartPoint(int flags);
+extern bool CreateRestartPoint(int flags);
 extern void XLogPutNextOid(Oid nextOid);
 extern XLogRecPtr GetRedoRecPtr(void);
 extern XLogRecPtr GetInsertRecPtr(void);
-- 
2.39.5