Allow a no-wait lock acquisition to succeed in more cases.
authorRobert Haas <rhaas@postgresql.org>
Thu, 14 Mar 2024 12:55:25 +0000 (08:55 -0400)
committerRobert Haas <rhaas@postgresql.org>
Thu, 14 Mar 2024 12:56:06 +0000 (08:56 -0400)
We don't determine the position at which a process waiting for a lock
should insert itself into the wait queue until we reach ProcSleep(),
and we may at that point discover that we must insert ourselves ahead
of everyone who wants a conflicting lock, in which case we obtain the
lock immediately. Up until now, a no-wait lock acquisition would fail
in such cases, erroneously claiming that the lock couldn't be obtained
immediately.  Fix that by trying ProcSleep even in the no-wait case.

No back-patch for now, because I'm treating this as an improvement to
the existing no-wait feature. It could instead be argued that it's a
bug fix, on the theory that there should never be any case whatsoever
where no-wait fails to obtain a lock that would have been obtained
immediately without no-wait, but I'm reluctant to interpret the
semantics of no-wait that strictly.

Robert Haas and Jingxian Li

Discussion: http://postgr.es/m/CA+TgmobCH-kMXGVpb0BB-iNMdtcNkTvcZ4JBxDJows3kYM+GDg@mail.gmail.com

src/backend/storage/lmgr/lock.c
src/backend/storage/lmgr/proc.c
src/include/storage/proc.h
src/test/isolation/expected/lock-nowait.out [new file with mode: 0644]
src/test/isolation/isolation_schedule
src/test/isolation/specs/lock-nowait.spec [new file with mode: 0644]

index 0d93932d8d336f129d64d58158e53af1c0fd07df..5022a50dd7befddc578892e4f84a259f206b4739 100644 (file)
@@ -360,7 +360,8 @@ static PROCLOCK *SetupLockInTable(LockMethod lockMethodTable, PGPROC *proc,
 static void GrantLockLocal(LOCALLOCK *locallock, ResourceOwner owner);
 static void BeginStrongLockAcquire(LOCALLOCK *locallock, uint32 fasthashcode);
 static void FinishStrongLockAcquire(void);
-static void WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner);
+static void WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner,
+                      bool dontWait);
 static void ReleaseLockIfHeld(LOCALLOCK *locallock, bool sessionLock);
 static void LockReassignOwner(LOCALLOCK *locallock, ResourceOwner parent);
 static bool UnGrantLock(LOCK *lock, LOCKMODE lockmode,
@@ -1024,50 +1025,15 @@ LockAcquireExtended(const LOCKTAG *locktag,
    }
    else
    {
-       /*
-        * We can't acquire the lock immediately.  If caller specified no
-        * blocking, remove useless table entries and return
-        * LOCKACQUIRE_NOT_AVAIL without waiting.
-        */
-       if (dontWait)
-       {
-           AbortStrongLockAcquire();
-           if (proclock->holdMask == 0)
-           {
-               uint32      proclock_hashcode;
-
-               proclock_hashcode = ProcLockHashCode(&proclock->tag, hashcode);
-               dlist_delete(&proclock->lockLink);
-               dlist_delete(&proclock->procLink);
-               if (!hash_search_with_hash_value(LockMethodProcLockHash,
-                                                &(proclock->tag),
-                                                proclock_hashcode,
-                                                HASH_REMOVE,
-                                                NULL))
-                   elog(PANIC, "proclock table corrupted");
-           }
-           else
-               PROCLOCK_PRINT("LockAcquire: NOWAIT", proclock);
-           lock->nRequested--;
-           lock->requested[lockmode]--;
-           LOCK_PRINT("LockAcquire: conditional lock failed", lock, lockmode);
-           Assert((lock->nRequested > 0) && (lock->requested[lockmode] >= 0));
-           Assert(lock->nGranted <= lock->nRequested);
-           LWLockRelease(partitionLock);
-           if (locallock->nLocks == 0)
-               RemoveLocalLock(locallock);
-           if (locallockp)
-               *locallockp = NULL;
-           return LOCKACQUIRE_NOT_AVAIL;
-       }
-
        /*
         * Set bitmask of locks this process already holds on this object.
         */
        MyProc->heldLocks = proclock->holdMask;
 
        /*
-        * Sleep till someone wakes me up.
+        * Sleep till someone wakes me up. We do this even in the dontWait
+        * case, beause while trying to go to sleep, we may discover that we
+        * can acquire the lock immediately after all.
         */
 
        TRACE_POSTGRESQL_LOCK_WAIT_START(locktag->locktag_field1,
@@ -1077,7 +1043,7 @@ LockAcquireExtended(const LOCKTAG *locktag,
                                         locktag->locktag_type,
                                         lockmode);
 
-       WaitOnLock(locallock, owner);
+       WaitOnLock(locallock, owner, dontWait);
 
        TRACE_POSTGRESQL_LOCK_WAIT_DONE(locktag->locktag_field1,
                                        locktag->locktag_field2,
@@ -1093,17 +1059,63 @@ LockAcquireExtended(const LOCKTAG *locktag,
         */
 
        /*
-        * Check the proclock entry status, in case something in the ipc
-        * communication doesn't work correctly.
+        * Check the proclock entry status. If dontWait = true, this is an
+        * expected case; otherwise, it will open happen if something in the
+        * ipc communication doesn't work correctly.
         */
        if (!(proclock->holdMask & LOCKBIT_ON(lockmode)))
        {
            AbortStrongLockAcquire();
-           PROCLOCK_PRINT("LockAcquire: INCONSISTENT", proclock);
-           LOCK_PRINT("LockAcquire: INCONSISTENT", lock, lockmode);
-           /* Should we retry ? */
-           LWLockRelease(partitionLock);
-           elog(ERROR, "LockAcquire failed");
+
+           if (dontWait)
+           {
+               /*
+                * We can't acquire the lock immediately.  If caller specified
+                * no blocking, remove useless table entries and return
+                * LOCKACQUIRE_NOT_AVAIL without waiting.
+                */
+               if (proclock->holdMask == 0)
+               {
+                   uint32      proclock_hashcode;
+
+                   proclock_hashcode = ProcLockHashCode(&proclock->tag,
+                                                        hashcode);
+                   dlist_delete(&proclock->lockLink);
+                   dlist_delete(&proclock->procLink);
+                   if (!hash_search_with_hash_value(LockMethodProcLockHash,
+                                                    &(proclock->tag),
+                                                    proclock_hashcode,
+                                                    HASH_REMOVE,
+                                                    NULL))
+                       elog(PANIC, "proclock table corrupted");
+               }
+               else
+                   PROCLOCK_PRINT("LockAcquire: NOWAIT", proclock);
+               lock->nRequested--;
+               lock->requested[lockmode]--;
+               LOCK_PRINT("LockAcquire: conditional lock failed",
+                          lock, lockmode);
+               Assert((lock->nRequested > 0) &&
+                      (lock->requested[lockmode] >= 0));
+               Assert(lock->nGranted <= lock->nRequested);
+               LWLockRelease(partitionLock);
+               if (locallock->nLocks == 0)
+                   RemoveLocalLock(locallock);
+               if (locallockp)
+                   *locallockp = NULL;
+               return LOCKACQUIRE_NOT_AVAIL;
+           }
+           else
+           {
+               /*
+                * We should have gotten the lock, but somehow that didn't
+                * happen. If we get here, it's a bug.
+                */
+               PROCLOCK_PRINT("LockAcquire: INCONSISTENT", proclock);
+               LOCK_PRINT("LockAcquire: INCONSISTENT", lock, lockmode);
+               LWLockRelease(partitionLock);
+               elog(ERROR, "LockAcquire failed");
+           }
        }
        PROCLOCK_PRINT("LockAcquire: granted", proclock);
        LOCK_PRINT("LockAcquire: granted", lock, lockmode);
@@ -1777,10 +1789,11 @@ MarkLockClear(LOCALLOCK *locallock)
  * Caller must have set MyProc->heldLocks to reflect locks already held
  * on the lockable object by this process.
  *
- * The appropriate partition lock must be held at entry.
+ * The appropriate partition lock must be held at entry, and will still be
+ * held at exit.
  */
 static void
-WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner)
+WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner, bool dontWait)
 {
    LOCKMETHODID lockmethodid = LOCALLOCK_LOCKMETHOD(*locallock);
    LockMethod  lockMethodTable = LockMethods[lockmethodid];
@@ -1813,8 +1826,14 @@ WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner)
     */
    PG_TRY();
    {
-       if (ProcSleep(locallock, lockMethodTable) != PROC_WAIT_STATUS_OK)
+       /*
+        * If dontWait = true, we handle success and failure in the same way
+        * here. The caller will be able to sort out what has happened.
+        */
+       if (ProcSleep(locallock, lockMethodTable, dontWait) != PROC_WAIT_STATUS_OK
+           && !dontWait)
        {
+
            /*
             * We failed as a result of a deadlock, see CheckDeadLock(). Quit
             * now.
index f3e20038f421ce6337f6a72c50dc5704c6d50ef6..162b1f919dbc693c5e43bc7ea276cb4a3307331e 100644 (file)
@@ -1043,10 +1043,19 @@ AuxiliaryPidGetProc(int pid)
  * Caller must have set MyProc->heldLocks to reflect locks already held
  * on the lockable object by this process (under all XIDs).
  *
+ * It's not actually guaranteed that we need to wait when this function is
+ * called, because it could be that when we try to find a position at which
+ * to insert ourself into the wait queue, we discover that we must be inserted
+ * ahead of everyone who wants a lock that conflict with ours. In that case,
+ * we get the lock immediately. Beause of this, it's sensible for this function
+ * to have a dontWait argument, despite the name.
+ *
  * The lock table's partition lock must be held at entry, and will be held
  * at exit.
  *
- * Result: PROC_WAIT_STATUS_OK if we acquired the lock, PROC_WAIT_STATUS_ERROR if not (deadlock).
+ * Result: PROC_WAIT_STATUS_OK if we acquired the lock, PROC_WAIT_STATUS_ERROR
+ * if not (if dontWait = true, this is a deadlock; if dontWait = false, we
+ * would have had to wait).
  *
  * ASSUME: that no one will fiddle with the queue until after
  *     we release the partition lock.
@@ -1054,7 +1063,7 @@ AuxiliaryPidGetProc(int pid)
  * NOTES: The process queue is now a priority queue for locking.
  */
 ProcWaitStatus
-ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
+ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable, bool dontWait)
 {
    LOCKMODE    lockmode = locallock->tag.mode;
    LOCK       *lock = locallock->lock;
@@ -1167,6 +1176,13 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
        }
    }
 
+   /*
+    * At this point we know that we'd really need to sleep. If we've been
+    * commanded not to do that, bail out.
+    */
+   if (dontWait)
+       return PROC_WAIT_STATUS_ERROR;
+
    /*
     * Insert self into queue, at the position determined above.
     */
index 1095aefddfea19cb3bf33352831b61fad3e415ec..18891a86fb956ebfd8c7eda7583e27e1e42daa8f 100644 (file)
@@ -471,7 +471,9 @@ extern int  GetStartupBufferPinWaitBufId(void);
 extern bool HaveNFreeProcs(int n, int *nfree);
 extern void ProcReleaseLocks(bool isCommit);
 
-extern ProcWaitStatus ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable);
+extern ProcWaitStatus ProcSleep(LOCALLOCK *locallock,
+                               LockMethod lockMethodTable,
+                               bool dontWait);
 extern void ProcWakeup(PGPROC *proc, ProcWaitStatus waitStatus);
 extern void ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock);
 extern void CheckDeadLockAlert(void);
diff --git a/src/test/isolation/expected/lock-nowait.out b/src/test/isolation/expected/lock-nowait.out
new file mode 100644 (file)
index 0000000..2dc5aad
--- /dev/null
@@ -0,0 +1,9 @@
+Parsed test spec with 2 sessions
+
+starting permutation: s1a s2a s1b s1c s2c
+step s1a: LOCK TABLE a1 IN ACCESS EXCLUSIVE MODE;
+step s2a: LOCK TABLE a1 IN EXCLUSIVE MODE; <waiting ...>
+step s1b: LOCK TABLE a1 IN SHARE ROW EXCLUSIVE MODE NOWAIT;
+step s1c: COMMIT;
+step s2a: <... completed>
+step s2c: COMMIT;
index b2be88ead1d2597c4b54805930ba8e9b310927b7..188fc04f85e062b51839e9288d1060cce0016f9c 100644 (file)
@@ -111,3 +111,4 @@ test: serializable-parallel
 test: serializable-parallel-2
 test: serializable-parallel-3
 test: matview-write-skew
+test: lock-nowait
diff --git a/src/test/isolation/specs/lock-nowait.spec b/src/test/isolation/specs/lock-nowait.spec
new file mode 100644 (file)
index 0000000..bb46d12
--- /dev/null
@@ -0,0 +1,28 @@
+# While requesting nowait lock, if the lock requested should
+# be inserted in front of some waiter, check to see if the lock
+# conflicts with already-held locks or the requests before
+# the waiter. If not, then just grant myself the requested
+# lock immediately.  Test this scenario.
+
+setup
+{
+  CREATE TABLE a1 ();
+}
+
+teardown
+{
+  DROP TABLE a1;
+}
+
+session s1
+setup      { BEGIN; }
+step s1a   { LOCK TABLE a1 IN ACCESS EXCLUSIVE MODE; }
+step s1b   { LOCK TABLE a1 IN SHARE ROW EXCLUSIVE MODE NOWAIT; }
+step s1c   { COMMIT; }
+
+session s2
+setup      { BEGIN; }
+step s2a   { LOCK TABLE a1 IN EXCLUSIVE MODE; }
+step s2c   { COMMIT; }
+
+permutation s1a s2a s1b s1c s2c