From ab94a6978b84ab385f2f82e51a32463a05b0d0cb Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Mon, 9 Feb 2009 14:54:41 +0200 Subject: [PATCH] Add IsRecoveryProcessingMode() quick exits to XLogNeedsFlush, XLogAsyncCommitFlush and XLogBackgroundFlush. Fix restore command invocation so that fast shutdown requests are not lost. Update minRecoveryPoint in CreateRestartPoint when we can't create a restart point. --- src/backend/access/transam/xlog.c | 121 +++++++++++++++++++++--------- 1 file changed, 84 insertions(+), 37 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 0ace629e8f..87e4551b0e 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -429,6 +429,11 @@ static bool InRedo = false; * Flag set by interrupt handlers for later service in the redo loop. */ static volatile sig_atomic_t shutdown_requested = false; +/* + * Flag set when executing a restore command, to tell SIGTERM signal handler + * that it's safe to just proc_exit(0). + */ +static volatile sig_atomic_t in_restore_command = false; static void XLogArchiveNotify(const char *xlog); @@ -460,7 +465,7 @@ static void PreallocXlogFiles(XLogRecPtr endptr); static void RemoveOldXlogFiles(uint32 log, uint32 seg, XLogRecPtr endptr); static void ValidateXLOGDirectoryStructure(void); static void CleanupBackupHistory(void); -static void UpdateMinRecoveryPoint(XLogRecPtr lsn); +static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force); static XLogRecord *ReadRecord(XLogRecPtr *RecPtr, int emode); static bool ValidXLOGHeader(XLogPageHeader hdr, int emode); static XLogRecord *ReadCheckpointRecord(XLogRecPtr RecPtr, int whichChkpt); @@ -1766,14 +1771,16 @@ XLogSetAsyncCommitLSN(XLogRecPtr asyncCommitLSN) * Advance minRecoveryPoint in control file. * * If we crash during recovery, we must reach this point again before the - * database is consistent. If minRecoveryPoint is already greater than or - * equal to 'lsn', it is not updated. + * database is consistent. + * + * If 'force' is true, 'lsn' argument is ignored. Otherwise, minRecoveryPoint + * is is only updated if it's already greater than or equal to 'lsn'. */ static void -UpdateMinRecoveryPoint(XLogRecPtr lsn) +UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force) { /* Quick check using our local copy of the variable */ - if (!updateMinRecoveryPoint || XLByteLE(lsn, minRecoveryPoint)) + if (!updateMinRecoveryPoint || (!force && XLByteLE(lsn, minRecoveryPoint))) return; LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); @@ -1787,10 +1794,11 @@ UpdateMinRecoveryPoint(XLogRecPtr lsn) */ if (minRecoveryPoint.xlogid == 0 && minRecoveryPoint.xrecoff == 0) updateMinRecoveryPoint = false; - else if (XLByteLT(minRecoveryPoint, lsn)) + else if (force || XLByteLT(minRecoveryPoint, lsn)) { /* use volatile pointer to prevent code rearrangement */ volatile XLogCtlData *xlogctl = XLogCtl; + XLogRecPtr newMinRecoveryPoint; /* * To avoid having to update the control file too often, we update @@ -1798,12 +1806,16 @@ UpdateMinRecoveryPoint(XLogRecPtr lsn) * would suffice for correctness. */ SpinLockAcquire(&xlogctl->info_lck); - minRecoveryPoint = xlogctl->replayEndRecPtr; + newMinRecoveryPoint = xlogctl->replayEndRecPtr; SpinLockRelease(&xlogctl->info_lck); /* update control file */ - ControlFile->minRecoveryPoint = minRecoveryPoint; - UpdateControlFile(); + if (XLByteLT(ControlFile->minRecoveryPoint, newMinRecoveryPoint)) + { + ControlFile->minRecoveryPoint = newMinRecoveryPoint; + UpdateControlFile(); + minRecoveryPoint = newMinRecoveryPoint; + } elog(DEBUG2, "updated min recovery point to %X/%X", minRecoveryPoint.xlogid, minRecoveryPoint.xrecoff); @@ -1829,7 +1841,7 @@ XLogFlush(XLogRecPtr record) */ if (IsRecoveryProcessingMode()) { - UpdateMinRecoveryPoint(record); + UpdateMinRecoveryPoint(record, false); return; } @@ -1957,6 +1969,10 @@ XLogBackgroundFlush(void) XLogRecPtr WriteRqstPtr; bool flexible = true; + /* XLOG doesn't need flushing during recovery */ + if (IsRecoveryProcessingMode()) + return; + /* read LogwrtResult and update local state */ { /* use volatile pointer to prevent code rearrangement */ @@ -2028,6 +2044,10 @@ XLogAsyncCommitFlush(void) /* use volatile pointer to prevent code rearrangement */ volatile XLogCtlData *xlogctl = XLogCtl; + /* There's no asynchronously committed transactions during recovery */ + if (IsRecoveryProcessingMode()) + return; + SpinLockAcquire(&xlogctl->info_lck); WriteRqstPtr = xlogctl->asyncCommitLSN; SpinLockRelease(&xlogctl->info_lck); @@ -2044,6 +2064,10 @@ XLogAsyncCommitFlush(void) bool XLogNeedsFlush(XLogRecPtr record) { + /* XLOG doesn't flushing during recovery */ + if (IsRecoveryProcessingMode()) + return false; + /* Quick exit if already known flushed */ if (XLByteLE(record, LogwrtResult.Flush)) return false; @@ -2718,10 +2742,23 @@ RestoreArchivedFile(char *path, const char *xlogfname, (errmsg_internal("executing restore command \"%s\"", xlogRestoreCmd))); + + /* + * Set in_restore_command to indicate that we should just exit on + * SIGTERM. We know that we're in a safe point to do that. Check + * if we had already received the signal. + */ + in_restore_command = true; + if (shutdown_requested) + proc_exit(0); + /* * Copy xlog from archival storage to XLOGDIR */ rc = system(xlogRestoreCmd); + + in_restore_command = false; + if (rc == 0) { /* @@ -2774,25 +2811,22 @@ RestoreArchivedFile(char *path, const char *xlogfname, * assume that recovery is complete and start up the database!) It's * essential to abort on child SIGINT and SIGQUIT, because per spec * system() ignores SIGINT and SIGQUIT while waiting; if we see one of - * those it's a good bet we should have gotten it too. Aborting on other - * signals such as SIGTERM seems a good idea as well. + * those it's a good bet we should have gotten it too. * - * However, if we were requested to terminate, we don't really care what - * happened to the restore command, so we just exit cleanly. In fact, - * the restore command most likely received the SIGTERM too, and we don't - * want to complain about that. + * On SIGTERM, assume we have received a fast shutdown request, and exit + * cleanly. It's pure chance whether we receive the SIGTERM first, or the + * child process. If we receive it first, the signal handler will call + * proc_exit(0), otherwise we do it here. If we received SIGTERM for any + * other reason, postmaster will perform an immediate shutdown when it + * sees us exiting unexpectedly. * * Per the Single Unix Spec, shells report exit status > 128 when a called * command died on a signal. Also, 126 and 127 are used to report * problems such as an unfindable command; treat those as fatal errors * too. */ - if (shutdown_requested && InRedo) - { - /* XXX: Is EndRecPtr always the right value here? */ - UpdateMinRecoveryPoint(EndRecPtr); + if (WTERMSIG(rc) == SIGTERM) proc_exit(0); - } signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125; @@ -5335,10 +5369,7 @@ StartupXLOG(void) * recovery. */ if (shutdown_requested) - { - UpdateMinRecoveryPoint(ReadRecPtr); proc_exit(0); - } /* * Have we reached our safe starting point? If so, we can @@ -6437,28 +6468,41 @@ CreateRestartPoint(int flags) memcpy(&lastCheckPoint, &XLogCtl->lastCheckPoint, sizeof(CheckPoint)); SpinLockRelease(&xlogctl->info_lck); - /* - * If the last checkpoint record we've replayed is already our last - * restartpoint, we're done. + /* + * Check that we're still in recovery mode. It's ok if we exit recovery + * mode after this check, the restart point is valid anyway. */ - if (XLogRecPtrIsInvalid(lastCheckPointRecPtr) || - XLByteLE(lastCheckPoint.redo, ControlFile->checkPointCopy.redo)) + if (!IsRecoveryProcessingMode()) { ereport(DEBUG2, - (errmsg("skipping restartpoint, already performed at %X/%X", - lastCheckPoint.redo.xlogid, lastCheckPoint.redo.xrecoff))); + (errmsg("skipping restartpoint, recovery has already ended"))); LWLockRelease(CheckpointLock); return false; } - /* - * Check that we're still in recovery mode. It's ok if we exit recovery - * mode after this check, the restart point is valid anyway. + /* + * If the last checkpoint record we've replayed is already our last + * restartpoint, we can't perform a new restart point. We still update + * minRecoveryPoint in that case, so that if this is a shutdown restart + * point, we won't start up earlier than before. That's not strictly + * necessary, but when we get hot standby capability, it would be rather + * weird if the database opened up for read-only connections at a + * point-in-time before the last shutdown. Such time travel is still + * possible in case of immediate shutdown, though. + * + * We don't explicitly advance minRecoveryPoint when we do create a + * restartpoint. It's assumed that flushing the buffers will do that + * as a side-effect. */ - if (!IsRecoveryProcessingMode()) + if (XLogRecPtrIsInvalid(lastCheckPointRecPtr) || + XLByteLE(lastCheckPoint.redo, ControlFile->checkPointCopy.redo)) { + XLogRecPtr InvalidXLogRecPtr = {0, 0}; ereport(DEBUG2, - (errmsg("skipping restartpoint, recovery has already ended"))); + (errmsg("skipping restartpoint, already performed at %X/%X", + lastCheckPoint.redo.xlogid, lastCheckPoint.redo.xrecoff))); + + UpdateMinRecoveryPoint(InvalidXLogRecPtr, true); LWLockRelease(CheckpointLock); return false; } @@ -7599,7 +7643,10 @@ startupproc_quickdie(SIGNAL_ARGS) static void StartupProcShutdownHandler(SIGNAL_ARGS) { - shutdown_requested = true; + if (in_restore_command) + proc_exit(0); + else + shutdown_requested = true; } /* Main entry point for startup process */ -- 2.39.5