Rework wait for AccessExclusiveLocks on Hot Standby
authorSimon Riggs <simon@2ndQuadrant.com>
Thu, 10 Mar 2016 19:26:24 +0000 (19:26 +0000)
committerSimon Riggs <simon@2ndQuadrant.com>
Thu, 10 Mar 2016 19:26:24 +0000 (19:26 +0000)
Earlier version committed in 9.0 caused spurious waits in some cases.
New infrastructure for lock waits in 9.3 used to correct and improve this.

Jeff Janes based upon a proposal by Simon Riggs, who also reviewed
Additional review comments from Amit Kapila

src/backend/postmaster/startup.c
src/backend/storage/ipc/standby.c
src/backend/storage/lmgr/proc.c
src/include/storage/standby.h
src/include/utils/timeout.h

index 639fefb3cbd3d93c0a36fd904465625d5adb3479..a7ae7e3bda00fa2ee9589957e039a37d782fcb47 100644 (file)
@@ -203,6 +203,7 @@ StartupProcessMain(void)
     */
    RegisterTimeout(STANDBY_DEADLOCK_TIMEOUT, StandbyDeadLockHandler);
    RegisterTimeout(STANDBY_TIMEOUT, StandbyTimeoutHandler);
+   RegisterTimeout(STANDBY_LOCK_TIMEOUT, StandbyLockTimeoutHandler);
 
    /*
     * Unblock signals (they were blocked when the postmaster forked us)
index 206fa2d69e6ada8e14ffe08b239ae7568b3d6771..6a9bf842d3923affc66e251dcd277afcab1ec7e9 100644 (file)
@@ -41,7 +41,6 @@ static List *RecoveryLockList;
 
 static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
                                       ProcSignalReason reason);
-static void ResolveRecoveryConflictWithLock(Oid dbOid, Oid relOid);
 static void SendRecoveryConflictWithBufferPin(ProcSignalReason reason);
 static XLogRecPtr LogCurrentRunningXacts(RunningTransactions CurrRunningXacts);
 static void LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks);
@@ -339,39 +338,65 @@ ResolveRecoveryConflictWithDatabase(Oid dbid)
    }
 }
 
-static void
-ResolveRecoveryConflictWithLock(Oid dbOid, Oid relOid)
+/*
+ * ResolveRecoveryConflictWithLock is called from ProcSleep()
+ * to resolve conflicts with other backends holding relation locks.
+ *
+ * The WaitLatch sleep normally done in ProcSleep()
+ * (when not InHotStandby) is performed here, for code clarity.
+ *
+ * We either resolve conflicts immediately or set a timeout to wake us at
+ * the limit of our patience.
+ *
+ * Resolve conflicts by cancelling to all backends holding a conflicting
+ * lock.  As we are already queued to be granted the lock, no new lock
+ * requests conflicting with ours will be granted in the meantime.
+ *
+ * Deadlocks involving the Startup process and an ordinary backend process
+ * will be detected by the deadlock detector within the ordinary backend.
+ */
+void
+ResolveRecoveryConflictWithLock(LOCKTAG locktag)
 {
-   VirtualTransactionId *backends;
-   bool        lock_acquired = false;
-   int         num_attempts = 0;
-   LOCKTAG     locktag;
+   TimestampTz ltime;
 
-   SET_LOCKTAG_RELATION(locktag, dbOid, relOid);
+   Assert(InHotStandby);
 
-   /*
-    * If blowing away everybody with conflicting locks doesn't work, after
-    * the first two attempts then we just start blowing everybody away until
-    * it does work. We do this because its likely that we either have too
-    * many locks and we just can't get one at all, or that there are many
-    * people crowding for the same table. Recovery must win; the end
-    * justifies the means.
-    */
-   while (!lock_acquired)
-   {
-       if (++num_attempts < 3)
-           backends = GetLockConflicts(&locktag, AccessExclusiveLock);
-       else
-           backends = GetConflictingVirtualXIDs(InvalidTransactionId,
-                                                InvalidOid);
+   ltime = GetStandbyLimitTime();
 
+   if (GetCurrentTimestamp() >= ltime)
+   {
+       /*
+        * We're already behind, so clear a path as quickly as possible.
+        */
+       VirtualTransactionId *backends;
+       backends = GetLockConflicts(&locktag, AccessExclusiveLock);
        ResolveRecoveryConflictWithVirtualXIDs(backends,
                                             PROCSIG_RECOVERY_CONFLICT_LOCK);
+   }
+   else
+   {
+       /*
+        * Wait (or wait again) until ltime
+        */
+       EnableTimeoutParams timeouts[1];
 
-       if (LockAcquireExtended(&locktag, AccessExclusiveLock, true, true, false)
-           != LOCKACQUIRE_NOT_AVAIL)
-           lock_acquired = true;
+       timeouts[0].id = STANDBY_LOCK_TIMEOUT;
+       timeouts[0].type = TMPARAM_AT;
+       timeouts[0].fin_time = ltime;
+       enable_timeouts(timeouts, 1);
    }
+
+   /* Wait to be signaled by the release of the Relation Lock */
+   ProcWaitForSignal();
+
+   /*
+    * Clear any timeout requests established above.  We assume here that the
+    * Startup process doesn't have any other outstanding timeouts than those
+    * used by this function. If that stops being true, we could cancel the
+    * timeouts individually, but that'd be slower.
+    */
+   disable_all_timeouts(false);
 }
 
 /*
@@ -534,6 +559,14 @@ StandbyTimeoutHandler(void)
    SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
 }
 
+/*
+ * StandbyLockTimeoutHandler() will be called if STANDBY_LOCK_TIMEOUT is exceeded.
+ * This doesn't need to do anything, simply waking up is enough.
+ */
+void
+StandbyLockTimeoutHandler(void)
+{
+}
 
 /*
  * -----------------------------------------------------
@@ -547,7 +580,7 @@ StandbyTimeoutHandler(void)
  * process is the proxy by which the original locks are implemented.
  *
  * We only keep track of AccessExclusiveLocks, which are only ever held by
- * one transaction on one relation, and don't worry about lock queuing.
+ * one transaction on one relation.
  *
  * We keep a single dynamically expandible list of locks in local memory,
  * RelationLockList, so we can keep track of the various entries made by
@@ -589,14 +622,9 @@ StandbyAcquireAccessExclusiveLock(TransactionId xid, Oid dbOid, Oid relOid)
    newlock->relOid = relOid;
    RecoveryLockList = lappend(RecoveryLockList, newlock);
 
-   /*
-    * Attempt to acquire the lock as requested, if not resolve conflict
-    */
    SET_LOCKTAG_RELATION(locktag, newlock->dbOid, newlock->relOid);
 
-   if (LockAcquireExtended(&locktag, AccessExclusiveLock, true, true, false)
-       == LOCKACQUIRE_NOT_AVAIL)
-       ResolveRecoveryConflictWithLock(newlock->dbOid, newlock->relOid);
+   LockAcquireExtended(&locktag, AccessExclusiveLock, true, false, false);
 }
 
 static void
index 46838d852e5882a0aef58a7611a3750e4c4d8c3c..74ef4197986347c4975063390669d636e0a4975e 100644 (file)
@@ -42,6 +42,7 @@
 #include "postmaster/autovacuum.h"
 #include "replication/slot.h"
 #include "replication/syncrep.h"
+#include "storage/standby.h"
 #include "storage/ipc.h"
 #include "storage/lmgr.h"
 #include "storage/pmsignal.h"
@@ -1169,21 +1170,27 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
     *
     * If LockTimeout is set, also enable the timeout for that.  We can save a
     * few cycles by enabling both timeout sources in one call.
+    *
+    * If InHotStandby we set lock waits slightly later for clarity with other
+    * code.
     */
-   if (LockTimeout > 0)
+   if (!InHotStandby)
    {
-       EnableTimeoutParams timeouts[2];
-
-       timeouts[0].id = DEADLOCK_TIMEOUT;
-       timeouts[0].type = TMPARAM_AFTER;
-       timeouts[0].delay_ms = DeadlockTimeout;
-       timeouts[1].id = LOCK_TIMEOUT;
-       timeouts[1].type = TMPARAM_AFTER;
-       timeouts[1].delay_ms = LockTimeout;
-       enable_timeouts(timeouts, 2);
+       if (LockTimeout > 0)
+       {
+           EnableTimeoutParams timeouts[2];
+
+           timeouts[0].id = DEADLOCK_TIMEOUT;
+           timeouts[0].type = TMPARAM_AFTER;
+           timeouts[0].delay_ms = DeadlockTimeout;
+           timeouts[1].id = LOCK_TIMEOUT;
+           timeouts[1].type = TMPARAM_AFTER;
+           timeouts[1].delay_ms = LockTimeout;
+           enable_timeouts(timeouts, 2);
+       }
+       else
+           enable_timeout_after(DEADLOCK_TIMEOUT, DeadlockTimeout);
    }
-   else
-       enable_timeout_after(DEADLOCK_TIMEOUT, DeadlockTimeout);
 
    /*
     * If somebody wakes us between LWLockRelease and WaitLatch, the latch
@@ -1201,15 +1208,23 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
     */
    do
    {
-       WaitLatch(MyLatch, WL_LATCH_SET, 0);
-       ResetLatch(MyLatch);
-       /* check for deadlocks first, as that's probably log-worthy */
-       if (got_deadlock_timeout)
+       if (InHotStandby)
+       {
+           /* Set a timer and wait for that or for the Lock to be granted */
+           ResolveRecoveryConflictWithLock(locallock->tag.lock);
+       }
+       else
        {
-           CheckDeadLock();
-           got_deadlock_timeout = false;
+           WaitLatch(MyLatch, WL_LATCH_SET, 0);
+           ResetLatch(MyLatch);
+           /* check for deadlocks first, as that's probably log-worthy */
+           if (got_deadlock_timeout)
+           {
+               CheckDeadLock();
+               got_deadlock_timeout = false;
+           }
+           CHECK_FOR_INTERRUPTS();
        }
-       CHECK_FOR_INTERRUPTS();
 
        /*
         * waitStatus could change from STATUS_WAITING to something else
@@ -1447,18 +1462,21 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
     * already caused QueryCancelPending to become set, we want the cancel to
     * be reported as a lock timeout, not a user cancel.
     */
-   if (LockTimeout > 0)
+   if (!InHotStandby)
    {
-       DisableTimeoutParams timeouts[2];
+       if (LockTimeout > 0)
+       {
+           DisableTimeoutParams timeouts[2];
 
-       timeouts[0].id = DEADLOCK_TIMEOUT;
-       timeouts[0].keep_indicator = false;
-       timeouts[1].id = LOCK_TIMEOUT;
-       timeouts[1].keep_indicator = true;
-       disable_timeouts(timeouts, 2);
+           timeouts[0].id = DEADLOCK_TIMEOUT;
+           timeouts[0].keep_indicator = false;
+           timeouts[1].id = LOCK_TIMEOUT;
+           timeouts[1].keep_indicator = true;
+           disable_timeouts(timeouts, 2);
+       }
+       else
+           disable_timeout(DEADLOCK_TIMEOUT, false);
    }
-   else
-       disable_timeout(DEADLOCK_TIMEOUT, false);
 
    /*
     * Re-acquire the lock table's partition lock.  We have to do this to hold
index 7e6b2254cd0c3a1ee69c268eb803f6dc547527db..aafc9b8a4828f6841350438dc217a91e1644fa75 100644 (file)
@@ -15,6 +15,7 @@
 #define STANDBY_H
 
 #include "storage/standbydefs.h"
+#include "storage/lock.h"
 #include "storage/procsignal.h"
 #include "storage/relfilenode.h"
 
@@ -31,10 +32,12 @@ extern void ResolveRecoveryConflictWithSnapshot(TransactionId latestRemovedXid,
 extern void ResolveRecoveryConflictWithTablespace(Oid tsid);
 extern void ResolveRecoveryConflictWithDatabase(Oid dbid);
 
+extern void ResolveRecoveryConflictWithLock(LOCKTAG locktag);
 extern void ResolveRecoveryConflictWithBufferPin(void);
 extern void CheckRecoveryConflictDeadlock(void);
 extern void StandbyDeadLockHandler(void);
 extern void StandbyTimeoutHandler(void);
+extern void StandbyLockTimeoutHandler(void);
 
 /*
  * Standby Rmgr (RM_STANDBY_ID)
index 723b475d2ab086cd68ce599efca41dc53bc64fcb..14e9720c8856854b1d8825eb88308ef834d32b72 100644 (file)
@@ -29,6 +29,7 @@ typedef enum TimeoutId
    STATEMENT_TIMEOUT,
    STANDBY_DEADLOCK_TIMEOUT,
    STANDBY_TIMEOUT,
+   STANDBY_LOCK_TIMEOUT,
    /* First user-definable timeout reason */
    USER_TIMEOUT,
    /* Maximum number of timeout reasons */