Prefer standby promotion over recovery pause.
authorFujii Masao <fujii@postgresql.org>
Tue, 24 Mar 2020 03:46:48 +0000 (12:46 +0900)
committerFujii Masao <fujii@postgresql.org>
Tue, 24 Mar 2020 03:46:48 +0000 (12:46 +0900)
Previously if a promotion was triggered while recovery was paused,
the paused state continued. Also recovery could be paused by executing
pg_wal_replay_pause() even while a promotion was ongoing. That is,
recovery pause had higher priority over a standby promotion.
But this behavior was not desirable because most users basically wanted
the recovery to complete as soon as possible and the server to become
the master when they requested a promotion.

This commit changes recovery so that it prefers a promotion over
recovery pause. That is, if a promotion is triggered while recovery
is paused, the paused state ends and a promotion continues. Also
this commit makes recovery pause functions like pg_wal_replay_pause()
throw an error if they are executed while a promotion is ongoing.

Internally, this commit adds new internal function PromoteIsTriggered()
that returns true if a promotion is triggered. Since the name of
this function and the existing function IsPromoteTriggered() are
confusingly similar, the commit changes the name of IsPromoteTriggered()
to IsPromoteSignaled, as more appropriate name.

Author: Fujii Masao
Reviewed-by: Atsushi Torikoshi, Sergei Kornilov
Discussion: https://postgr.es/m/00c194b2-dbbb-2e8a-5b39-13f14048ef0a@oss.nttdata.com

doc/src/sgml/config.sgml
doc/src/sgml/func.sgml
src/backend/access/transam/xlog.c
src/backend/access/transam/xlogfuncs.c
src/backend/postmaster/startup.c
src/include/access/xlog.h
src/include/postmaster/startup.h

index 70854ae2986aa68bde00d8c6503426c185e2ebb0..355b408b0a672c46c21b09a6944d09f1df2c531f 100644 (file)
@@ -3590,6 +3590,9 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"'  # Windows
         This setting has no effect if no recovery target is set.
         If <xref linkend="guc-hot-standby"/> is not enabled, a setting of
         <literal>pause</literal> will act the same as <literal>shutdown</literal>.
+        If the recovery target is reached while a promotion is ongoing,
+        a setting of <literal>pause</literal> will act the same as
+        <literal>promote</literal>.
        </para>
        <para>
         In any case, if a recovery target is configured but the archive
index 0057a1775704c9d3c904c2574edb05c354e435b3..7a0bb0c70a18c54d516dab33b839117582fc0c84 100644 (file)
@@ -20177,6 +20177,13 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup());
     recovery is resumed.
    </para>
 
+   <para>
+    <function>pg_wal_replay_pause</function> and
+    <function>pg_wal_replay_resume</function> cannot be executed while
+    a promotion is ongoing. If a promotion is triggered while recovery
+    is paused, the paused state ends and a promotion continues.
+   </para>
+
    <para>
     If streaming replication is disabled, the paused state may continue
     indefinitely without problem. While streaming replication is in
index 64860f12f56c62060d2f073939c2cb133ffdb177..7621fc05e24e3fc574add629fa1e0d5a3745d60d 100644 (file)
@@ -229,6 +229,12 @@ static bool LocalRecoveryInProgress = true;
  */
 static bool LocalHotStandbyActive = false;
 
+/*
+ * Local copy of SharedPromoteIsTriggered variable. False actually means "not
+ * known, need to check the shared state".
+ */
+static bool LocalPromoteIsTriggered = false;
+
 /*
  * Local state for XLogInsertAllowed():
  *     1: unconditionally allowed to insert XLOG
@@ -654,6 +660,12 @@ typedef struct XLogCtlData
     */
    bool        SharedHotStandbyActive;
 
+   /*
+    * SharedPromoteIsTriggered indicates if a standby promotion has been
+    * triggered.  Protected by info_lck.
+    */
+   bool        SharedPromoteIsTriggered;
+
    /*
     * WalWriterSleeping indicates whether the WAL writer is currently in
     * low-power mode (and hence should be nudged if an async commit occurs).
@@ -912,6 +924,7 @@ static void InitControlFile(uint64 sysidentifier);
 static void WriteControlFile(void);
 static void ReadControlFile(void);
 static char *str_time(pg_time_t tnow);
+static void SetPromoteIsTriggered(void);
 static bool CheckForStandbyTrigger(void);
 
 #ifdef WAL_DEBUG
@@ -5112,6 +5125,7 @@ XLOGShmemInit(void)
    XLogCtl->XLogCacheBlck = XLOGbuffers - 1;
    XLogCtl->SharedRecoveryInProgress = true;
    XLogCtl->SharedHotStandbyActive = false;
+   XLogCtl->SharedPromoteIsTriggered = false;
    XLogCtl->WalWriterSleeping = false;
 
    SpinLockInit(&XLogCtl->Insert.insertpos_lck);
@@ -5940,16 +5954,22 @@ recoveryPausesHere(void)
    if (!LocalHotStandbyActive)
        return;
 
+   /* Don't pause after standby promotion has been triggered */
+   if (LocalPromoteIsTriggered)
+       return;
+
    ereport(LOG,
            (errmsg("recovery has paused"),
             errhint("Execute pg_wal_replay_resume() to continue.")));
 
    while (RecoveryIsPaused())
    {
+       HandleStartupProcInterrupts();
+       if (CheckForStandbyTrigger())
+           return;
        pgstat_report_wait_start(WAIT_EVENT_RECOVERY_PAUSE);
        pg_usleep(1000000L);    /* 1000 ms */
        pgstat_report_wait_end();
-       HandleStartupProcInterrupts();
    }
 }
 
@@ -12277,6 +12297,40 @@ emode_for_corrupt_record(int emode, XLogRecPtr RecPtr)
    return emode;
 }
 
+/*
+ * Has a standby promotion already been triggered?
+ *
+ * Unlike CheckForStandbyTrigger(), this works in any process
+ * that's connected to shared memory.
+ */
+bool
+PromoteIsTriggered(void)
+{
+   /*
+    * We check shared state each time only until a standby promotion is
+    * triggered. We can't trigger a promotion again, so there's no need to
+    * keep checking after the shared variable has once been seen true.
+    */
+   if (LocalPromoteIsTriggered)
+       return true;
+
+   SpinLockAcquire(&XLogCtl->info_lck);
+   LocalPromoteIsTriggered = XLogCtl->SharedPromoteIsTriggered;
+   SpinLockRelease(&XLogCtl->info_lck);
+
+   return LocalPromoteIsTriggered;
+}
+
+static void
+SetPromoteIsTriggered(void)
+{
+   SpinLockAcquire(&XLogCtl->info_lck);
+   XLogCtl->SharedPromoteIsTriggered = true;
+   SpinLockRelease(&XLogCtl->info_lck);
+
+   LocalPromoteIsTriggered = true;
+}
+
 /*
  * Check to see whether the user-specified trigger file exists and whether a
  * promote request has arrived.  If either condition holds, return true.
@@ -12285,12 +12339,11 @@ static bool
 CheckForStandbyTrigger(void)
 {
    struct stat stat_buf;
-   static bool triggered = false;
 
-   if (triggered)
+   if (LocalPromoteIsTriggered)
        return true;
 
-   if (IsPromoteTriggered())
+   if (IsPromoteSignaled())
    {
        /*
         * In 9.1 and 9.2 the postmaster unlinked the promote file inside the
@@ -12313,8 +12366,8 @@ CheckForStandbyTrigger(void)
 
        ereport(LOG, (errmsg("received promote request")));
 
-       ResetPromoteTriggered();
-       triggered = true;
+       ResetPromoteSignaled();
+       SetPromoteIsTriggered();
        return true;
    }
 
@@ -12326,7 +12379,7 @@ CheckForStandbyTrigger(void)
        ereport(LOG,
                (errmsg("promote trigger file found: %s", PromoteTriggerFile)));
        unlink(PromoteTriggerFile);
-       triggered = true;
+       SetPromoteIsTriggered();
        fast_promote = true;
        return true;
    }
index 20316539b6f139db3d07cc5611770bf151f7d67a..b84ba572596082af229d74eedd4cf50bc5ef8ccc 100644 (file)
@@ -531,6 +531,13 @@ pg_wal_replay_pause(PG_FUNCTION_ARGS)
                 errmsg("recovery is not in progress"),
                 errhint("Recovery control functions can only be executed during recovery.")));
 
+   if (PromoteIsTriggered())
+       ereport(ERROR,
+               (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                errmsg("standby promotion is ongoing"),
+                errhint("%s cannot be executed after promotion is triggered.",
+                        "pg_wal_replay_pause()")));
+
    SetRecoveryPause(true);
 
    PG_RETURN_VOID();
@@ -551,6 +558,13 @@ pg_wal_replay_resume(PG_FUNCTION_ARGS)
                 errmsg("recovery is not in progress"),
                 errhint("Recovery control functions can only be executed during recovery.")));
 
+   if (PromoteIsTriggered())
+       ereport(ERROR,
+               (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                errmsg("standby promotion is ongoing"),
+                errhint("%s cannot be executed after promotion is triggered.",
+                        "pg_wal_replay_resume()")));
+
    SetRecoveryPause(false);
 
    PG_RETURN_VOID();
index c2250d7d4e49d39430fece261bddab62f72d472d..89526767650d2bdfeac2468489687f3e99050266 100644 (file)
@@ -39,7 +39,7 @@
  */
 static volatile sig_atomic_t got_SIGHUP = false;
 static volatile sig_atomic_t shutdown_requested = false;
-static volatile sig_atomic_t promote_triggered = false;
+static volatile sig_atomic_t promote_signaled = false;
 
 /*
  * Flag set when executing a restore command, to tell SIGTERM signal handler
@@ -63,7 +63,7 @@ StartupProcTriggerHandler(SIGNAL_ARGS)
 {
    int         save_errno = errno;
 
-   promote_triggered = true;
+   promote_signaled = true;
    WakeupRecovery();
 
    errno = save_errno;
@@ -197,13 +197,13 @@ PostRestoreCommand(void)
 }
 
 bool
-IsPromoteTriggered(void)
+IsPromoteSignaled(void)
 {
-   return promote_triggered;
+   return promote_signaled;
 }
 
 void
-ResetPromoteTriggered(void)
+ResetPromoteSignaled(void)
 {
-   promote_triggered = false;
+   promote_signaled = false;
 }
index 98b033fc208502c16f6890ff915fabfae09e8d56..331497bcfb9f8252ec5e2fef52e12f5eb8f35bba 100644 (file)
@@ -313,6 +313,7 @@ extern XLogRecPtr GetFlushRecPtr(void);
 extern XLogRecPtr GetLastImportantRecPtr(void);
 extern void RemovePromoteSignalFiles(void);
 
+extern bool PromoteIsTriggered(void);
 extern bool CheckPromoteSignal(void);
 extern void WakeupRecovery(void);
 extern void SetWalWriterSleeping(bool sleeping);
index 9f59c1ffa3be7376e4c43bae1b1522c895fb322a..bec313764a192c69081bcd6eaa84b70022028561 100644 (file)
@@ -16,7 +16,7 @@ extern void HandleStartupProcInterrupts(void);
 extern void StartupProcessMain(void) pg_attribute_noreturn();
 extern void PreRestoreCommand(void);
 extern void PostRestoreCommand(void);
-extern bool IsPromoteTriggered(void);
-extern void ResetPromoteTriggered(void);
+extern bool IsPromoteSignaled(void);
+extern void ResetPromoteSignaled(void);
 
 #endif                         /* _STARTUP_H */