Use atomic access for SlruShared->latest_page_number
authorAlvaro Herrera <alvherre@alvh.no-ip.org>
Tue, 6 Feb 2024 09:54:10 +0000 (10:54 +0100)
committerAlvaro Herrera <alvherre@alvh.no-ip.org>
Tue, 6 Feb 2024 09:54:10 +0000 (10:54 +0100)
The new concurrency model proposed for slru.c to improve performance
does not include any single lock that would coordinate processes
doing concurrent reads/writes on SlruShared->latest_page_number.
We can instead use atomic reads and writes for that variable.

Author: Dilip Kumar <dilipbalaut@gmail.com>
Reviewed-by: Andrey M. Borodin <x4mmm@yandex-team.ru>
Discussion: https://postgr.es/m/CAFiTN-vzDvNz=ExGXz6gdyjtzGixKSqs0mKHMmaQ8sOSEFZ33A@mail.gmail.com

src/backend/access/transam/clog.c
src/backend/access/transam/commit_ts.c
src/backend/access/transam/multixact.c
src/backend/access/transam/slru.c
src/include/access/slru.h

index f6e7da7ffc9563185eaf28d4711ff832d3ca30ec..06fc2989bab189bc4227a90e4c29d0852ed15ab8 100644 (file)
@@ -766,14 +766,10 @@ StartupCLOG(void)
        TransactionId xid = XidFromFullTransactionId(TransamVariables->nextXid);
        int64           pageno = TransactionIdToPage(xid);
 
-       LWLockAcquire(XactSLRULock, LW_EXCLUSIVE);
-
        /*
         * Initialize our idea of the latest page number.
         */
-       XactCtl->shared->latest_page_number = pageno;
-
-       LWLockRelease(XactSLRULock);
+       pg_atomic_write_u64(&XactCtl->shared->latest_page_number, pageno);
 }
 
 /*
index 61b82385f359426df200c4db7bb32e8c62b80742..6bfe60343e636018399856555fafd3653c5d6549 100644 (file)
@@ -689,9 +689,7 @@ ActivateCommitTs(void)
        /*
         * Re-Initialize our idea of the latest page number.
         */
-       LWLockAcquire(CommitTsSLRULock, LW_EXCLUSIVE);
-       CommitTsCtl->shared->latest_page_number = pageno;
-       LWLockRelease(CommitTsSLRULock);
+       pg_atomic_write_u64(&CommitTsCtl->shared->latest_page_number, pageno);
 
        /*
         * If CommitTs is enabled, but it wasn't in the previous server run, we
@@ -1006,7 +1004,8 @@ commit_ts_redo(XLogReaderState *record)
                 * During XLOG replay, latest_page_number isn't set up yet; insert a
                 * suitable value to bypass the sanity test in SimpleLruTruncate.
                 */
-               CommitTsCtl->shared->latest_page_number = trunc->pageno;
+               pg_atomic_write_u64(&CommitTsCtl->shared->latest_page_number,
+                                                       trunc->pageno);
 
                SimpleLruTruncate(CommitTsCtl, trunc->pageno);
        }
index 59523be90132856c1a895cfadb62877cd153d4da..febc429f72415242594e6917e3a3746498a037d2 100644 (file)
@@ -2017,13 +2017,15 @@ StartupMultiXact(void)
         * Initialize offset's idea of the latest page number.
         */
        pageno = MultiXactIdToOffsetPage(multi);
-       MultiXactOffsetCtl->shared->latest_page_number = pageno;
+       pg_atomic_write_u64(&MultiXactOffsetCtl->shared->latest_page_number,
+                                               pageno);
 
        /*
         * Initialize member's idea of the latest page number.
         */
        pageno = MXOffsetToMemberPage(offset);
-       MultiXactMemberCtl->shared->latest_page_number = pageno;
+       pg_atomic_write_u64(&MultiXactMemberCtl->shared->latest_page_number,
+                                               pageno);
 }
 
 /*
@@ -2047,14 +2049,15 @@ TrimMultiXact(void)
        oldestMXactDB = MultiXactState->oldestMultiXactDB;
        LWLockRelease(MultiXactGenLock);
 
-       /* Clean up offsets state */
-       LWLockAcquire(MultiXactOffsetSLRULock, LW_EXCLUSIVE);
-
        /*
         * (Re-)Initialize our idea of the latest page number for offsets.
         */
        pageno = MultiXactIdToOffsetPage(nextMXact);
-       MultiXactOffsetCtl->shared->latest_page_number = pageno;
+       pg_atomic_write_u64(&MultiXactOffsetCtl->shared->latest_page_number,
+                                               pageno);
+
+       /* Clean up offsets state */
+       LWLockAcquire(MultiXactOffsetSLRULock, LW_EXCLUSIVE);
 
        /*
         * Zero out the remainder of the current offsets page.  See notes in
@@ -2081,14 +2084,16 @@ TrimMultiXact(void)
 
        LWLockRelease(MultiXactOffsetSLRULock);
 
-       /* And the same for members */
-       LWLockAcquire(MultiXactMemberSLRULock, LW_EXCLUSIVE);
-
        /*
+        * And the same for members.
+        *
         * (Re-)Initialize our idea of the latest page number for members.
         */
        pageno = MXOffsetToMemberPage(offset);
-       MultiXactMemberCtl->shared->latest_page_number = pageno;
+       pg_atomic_write_u64(&MultiXactMemberCtl->shared->latest_page_number,
+                                               pageno);
+
+       LWLockAcquire(MultiXactMemberSLRULock, LW_EXCLUSIVE);
 
        /*
         * Zero out the remainder of the current members page.  See notes in
@@ -3333,7 +3338,8 @@ multixact_redo(XLogReaderState *record)
                 * SimpleLruTruncate.
                 */
                pageno = MultiXactIdToOffsetPage(xlrec.endTruncOff);
-               MultiXactOffsetCtl->shared->latest_page_number = pageno;
+               pg_atomic_write_u64(&MultiXactOffsetCtl->shared->latest_page_number,
+                                                       pageno);
                PerformOffsetsTruncation(xlrec.startTruncOff, xlrec.endTruncOff);
 
                LWLockRelease(MultiXactTruncationLock);
index 9ac4790f1647c4a94272feb0a17538b5526e82eb..556d1c15867300d06cfbd52431370464dfbd0ccf 100644 (file)
@@ -17,7 +17,8 @@
  * per-buffer LWLocks that synchronize I/O for each buffer.  The control lock
  * must be held to examine or modify any shared state.  A process that is
  * reading in or writing out a page buffer does not hold the control lock,
- * only the per-buffer lock for the buffer it is working on.
+ * only the per-buffer lock for the buffer it is working on.  One exception
+ * is latest_page_number, which is read and written using atomic ops.
  *
  * "Holding the control lock" means exclusive lock in all cases except for
  * SimpleLruReadPage_ReadOnly(); see comments for SlruRecentlyUsed() for
@@ -239,8 +240,7 @@ SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
                shared->lsn_groups_per_page = nlsns;
 
                shared->cur_lru_count = 0;
-
-               /* shared->latest_page_number will be set later */
+               pg_atomic_write_u64(&shared->latest_page_number, 0);
 
                shared->slru_stats_idx = pgstat_get_slru_index(name);
 
@@ -329,8 +329,15 @@ SimpleLruZeroPage(SlruCtl ctl, int64 pageno)
        /* Set the LSNs for this new page to zero */
        SimpleLruZeroLSNs(ctl, slotno);
 
-       /* Assume this page is now the latest active page */
-       shared->latest_page_number = pageno;
+       /*
+        * Assume this page is now the latest active page.
+        *
+        * Note that because both this routine and SlruSelectLRUPage run with
+        * ControlLock held, it is not possible for this to be zeroing a page that
+        * SlruSelectLRUPage is going to evict simultaneously.  Therefore, there's
+        * no memory barrier here.
+        */
+       pg_atomic_write_u64(&shared->latest_page_number, pageno);
 
        /* update the stats counter of zeroed pages */
        pgstat_count_slru_page_zeroed(shared->slru_stats_idx);
@@ -1113,9 +1120,17 @@ SlruSelectLRUPage(SlruCtl ctl, int64 pageno)
                                shared->page_lru_count[slotno] = cur_count;
                                this_delta = 0;
                        }
+
+                       /*
+                        * If this page is the one most recently zeroed, don't consider it
+                        * an eviction candidate. See comments in SimpleLruZeroPage for an
+                        * explanation about the lack of a memory barrier here.
+                        */
                        this_page_number = shared->page_number[slotno];
-                       if (this_page_number == shared->latest_page_number)
+                       if (this_page_number ==
+                               pg_atomic_read_u64(&shared->latest_page_number))
                                continue;
+
                        if (shared->page_status[slotno] == SLRU_PAGE_VALID)
                        {
                                if (this_delta > best_valid_delta ||
@@ -1254,7 +1269,6 @@ void
 SimpleLruTruncate(SlruCtl ctl, int64 cutoffPage)
 {
        SlruShared      shared = ctl->shared;
-       int                     slotno;
 
        /* update the stats counter of truncates */
        pgstat_count_slru_truncate(shared->slru_stats_idx);
@@ -1270,10 +1284,13 @@ SimpleLruTruncate(SlruCtl ctl, int64 cutoffPage)
 restart:
 
        /*
-        * While we are holding the lock, make an important safety check: the
-        * current endpoint page must not be eligible for removal.
+        * An important safety check: the current endpoint page must not be
+        * eligible for removal.  This check is just a backstop against wraparound
+        * bugs elsewhere in SLRU handling, so we don't care if we read a slightly
+        * outdated value; therefore we don't add a memory barrier.
         */
-       if (ctl->PagePrecedes(shared->latest_page_number, cutoffPage))
+       if (ctl->PagePrecedes(pg_atomic_read_u64(&shared->latest_page_number),
+                                                 cutoffPage))
        {
                LWLockRelease(shared->ControlLock);
                ereport(LOG,
@@ -1282,7 +1299,7 @@ restart:
                return;
        }
 
-       for (slotno = 0; slotno < shared->num_slots; slotno++)
+       for (int slotno = 0; slotno < shared->num_slots; slotno++)
        {
                if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
                        continue;
index b05f6bc71d912c500c222b4e67c8d27c62febd36..210948865471d421f7f8adbf8ccb3a7302c631f0 100644 (file)
@@ -49,6 +49,9 @@ typedef enum
 
 /*
  * Shared-memory state
+ *
+ * ControlLock is used to protect access to the other fields, except
+ * latest_page_number, which uses atomics; see comment in slru.c.
  */
 typedef struct SlruSharedData
 {
@@ -95,7 +98,7 @@ typedef struct SlruSharedData
         * this is not critical data, since we use it only to avoid swapping out
         * the latest page.
         */
-       int64           latest_page_number;
+       pg_atomic_uint64 latest_page_number;
 
        /* SLRU's index for statistics purposes (might not be unique) */
        int                     slru_stats_idx;