Use atomic access for SlruShared->latest_page_number
authorAlvaro Herrera <alvherre@alvh.no-ip.org>
Tue, 6 Feb 2024 09:54:10 +0000 (10:54 +0100)
committerAlvaro Herrera <alvherre@alvh.no-ip.org>
Tue, 6 Feb 2024 09:54:10 +0000 (10:54 +0100)
The new concurrency model proposed for slru.c to improve performance
does not include any single lock that would coordinate processes
doing concurrent reads/writes on SlruShared->latest_page_number.
We can instead use atomic reads and writes for that variable.

Author: Dilip Kumar <dilipbalaut@gmail.com>
Reviewed-by: Andrey M. Borodin <x4mmm@yandex-team.ru>
Discussion: https://postgr.es/m/CAFiTN-vzDvNz=ExGXz6gdyjtzGixKSqs0mKHMmaQ8sOSEFZ33A@mail.gmail.com

src/backend/access/transam/clog.c
src/backend/access/transam/commit_ts.c
src/backend/access/transam/multixact.c
src/backend/access/transam/slru.c
src/include/access/slru.h

index f6e7da7ffc9563185eaf28d4711ff832d3ca30ec..06fc2989bab189bc4227a90e4c29d0852ed15ab8 100644 (file)
@@ -766,14 +766,10 @@ StartupCLOG(void)
    TransactionId xid = XidFromFullTransactionId(TransamVariables->nextXid);
    int64       pageno = TransactionIdToPage(xid);
 
-   LWLockAcquire(XactSLRULock, LW_EXCLUSIVE);
-
    /*
     * Initialize our idea of the latest page number.
     */
-   XactCtl->shared->latest_page_number = pageno;
-
-   LWLockRelease(XactSLRULock);
+   pg_atomic_write_u64(&XactCtl->shared->latest_page_number, pageno);
 }
 
 /*
index 61b82385f359426df200c4db7bb32e8c62b80742..6bfe60343e636018399856555fafd3653c5d6549 100644 (file)
@@ -689,9 +689,7 @@ ActivateCommitTs(void)
    /*
     * Re-Initialize our idea of the latest page number.
     */
-   LWLockAcquire(CommitTsSLRULock, LW_EXCLUSIVE);
-   CommitTsCtl->shared->latest_page_number = pageno;
-   LWLockRelease(CommitTsSLRULock);
+   pg_atomic_write_u64(&CommitTsCtl->shared->latest_page_number, pageno);
 
    /*
     * If CommitTs is enabled, but it wasn't in the previous server run, we
@@ -1006,7 +1004,8 @@ commit_ts_redo(XLogReaderState *record)
         * During XLOG replay, latest_page_number isn't set up yet; insert a
         * suitable value to bypass the sanity test in SimpleLruTruncate.
         */
-       CommitTsCtl->shared->latest_page_number = trunc->pageno;
+       pg_atomic_write_u64(&CommitTsCtl->shared->latest_page_number,
+                           trunc->pageno);
 
        SimpleLruTruncate(CommitTsCtl, trunc->pageno);
    }
index 59523be90132856c1a895cfadb62877cd153d4da..febc429f72415242594e6917e3a3746498a037d2 100644 (file)
@@ -2017,13 +2017,15 @@ StartupMultiXact(void)
     * Initialize offset's idea of the latest page number.
     */
    pageno = MultiXactIdToOffsetPage(multi);
-   MultiXactOffsetCtl->shared->latest_page_number = pageno;
+   pg_atomic_write_u64(&MultiXactOffsetCtl->shared->latest_page_number,
+                       pageno);
 
    /*
     * Initialize member's idea of the latest page number.
     */
    pageno = MXOffsetToMemberPage(offset);
-   MultiXactMemberCtl->shared->latest_page_number = pageno;
+   pg_atomic_write_u64(&MultiXactMemberCtl->shared->latest_page_number,
+                       pageno);
 }
 
 /*
@@ -2047,14 +2049,15 @@ TrimMultiXact(void)
    oldestMXactDB = MultiXactState->oldestMultiXactDB;
    LWLockRelease(MultiXactGenLock);
 
-   /* Clean up offsets state */
-   LWLockAcquire(MultiXactOffsetSLRULock, LW_EXCLUSIVE);
-
    /*
     * (Re-)Initialize our idea of the latest page number for offsets.
     */
    pageno = MultiXactIdToOffsetPage(nextMXact);
-   MultiXactOffsetCtl->shared->latest_page_number = pageno;
+   pg_atomic_write_u64(&MultiXactOffsetCtl->shared->latest_page_number,
+                       pageno);
+
+   /* Clean up offsets state */
+   LWLockAcquire(MultiXactOffsetSLRULock, LW_EXCLUSIVE);
 
    /*
     * Zero out the remainder of the current offsets page.  See notes in
@@ -2081,14 +2084,16 @@ TrimMultiXact(void)
 
    LWLockRelease(MultiXactOffsetSLRULock);
 
-   /* And the same for members */
-   LWLockAcquire(MultiXactMemberSLRULock, LW_EXCLUSIVE);
-
    /*
+    * And the same for members.
+    *
     * (Re-)Initialize our idea of the latest page number for members.
     */
    pageno = MXOffsetToMemberPage(offset);
-   MultiXactMemberCtl->shared->latest_page_number = pageno;
+   pg_atomic_write_u64(&MultiXactMemberCtl->shared->latest_page_number,
+                       pageno);
+
+   LWLockAcquire(MultiXactMemberSLRULock, LW_EXCLUSIVE);
 
    /*
     * Zero out the remainder of the current members page.  See notes in
@@ -3333,7 +3338,8 @@ multixact_redo(XLogReaderState *record)
         * SimpleLruTruncate.
         */
        pageno = MultiXactIdToOffsetPage(xlrec.endTruncOff);
-       MultiXactOffsetCtl->shared->latest_page_number = pageno;
+       pg_atomic_write_u64(&MultiXactOffsetCtl->shared->latest_page_number,
+                           pageno);
        PerformOffsetsTruncation(xlrec.startTruncOff, xlrec.endTruncOff);
 
        LWLockRelease(MultiXactTruncationLock);
index 9ac4790f1647c4a94272feb0a17538b5526e82eb..556d1c15867300d06cfbd52431370464dfbd0ccf 100644 (file)
@@ -17,7 +17,8 @@
  * per-buffer LWLocks that synchronize I/O for each buffer.  The control lock
  * must be held to examine or modify any shared state.  A process that is
  * reading in or writing out a page buffer does not hold the control lock,
- * only the per-buffer lock for the buffer it is working on.
+ * only the per-buffer lock for the buffer it is working on.  One exception
+ * is latest_page_number, which is read and written using atomic ops.
  *
  * "Holding the control lock" means exclusive lock in all cases except for
  * SimpleLruReadPage_ReadOnly(); see comments for SlruRecentlyUsed() for
@@ -239,8 +240,7 @@ SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
        shared->lsn_groups_per_page = nlsns;
 
        shared->cur_lru_count = 0;
-
-       /* shared->latest_page_number will be set later */
+       pg_atomic_write_u64(&shared->latest_page_number, 0);
 
        shared->slru_stats_idx = pgstat_get_slru_index(name);
 
@@ -329,8 +329,15 @@ SimpleLruZeroPage(SlruCtl ctl, int64 pageno)
    /* Set the LSNs for this new page to zero */
    SimpleLruZeroLSNs(ctl, slotno);
 
-   /* Assume this page is now the latest active page */
-   shared->latest_page_number = pageno;
+   /*
+    * Assume this page is now the latest active page.
+    *
+    * Note that because both this routine and SlruSelectLRUPage run with
+    * ControlLock held, it is not possible for this to be zeroing a page that
+    * SlruSelectLRUPage is going to evict simultaneously.  Therefore, there's
+    * no memory barrier here.
+    */
+   pg_atomic_write_u64(&shared->latest_page_number, pageno);
 
    /* update the stats counter of zeroed pages */
    pgstat_count_slru_page_zeroed(shared->slru_stats_idx);
@@ -1113,9 +1120,17 @@ SlruSelectLRUPage(SlruCtl ctl, int64 pageno)
                shared->page_lru_count[slotno] = cur_count;
                this_delta = 0;
            }
+
+           /*
+            * If this page is the one most recently zeroed, don't consider it
+            * an eviction candidate. See comments in SimpleLruZeroPage for an
+            * explanation about the lack of a memory barrier here.
+            */
            this_page_number = shared->page_number[slotno];
-           if (this_page_number == shared->latest_page_number)
+           if (this_page_number ==
+               pg_atomic_read_u64(&shared->latest_page_number))
                continue;
+
            if (shared->page_status[slotno] == SLRU_PAGE_VALID)
            {
                if (this_delta > best_valid_delta ||
@@ -1254,7 +1269,6 @@ void
 SimpleLruTruncate(SlruCtl ctl, int64 cutoffPage)
 {
    SlruShared  shared = ctl->shared;
-   int         slotno;
 
    /* update the stats counter of truncates */
    pgstat_count_slru_truncate(shared->slru_stats_idx);
@@ -1270,10 +1284,13 @@ SimpleLruTruncate(SlruCtl ctl, int64 cutoffPage)
 restart:
 
    /*
-    * While we are holding the lock, make an important safety check: the
-    * current endpoint page must not be eligible for removal.
+    * An important safety check: the current endpoint page must not be
+    * eligible for removal.  This check is just a backstop against wraparound
+    * bugs elsewhere in SLRU handling, so we don't care if we read a slightly
+    * outdated value; therefore we don't add a memory barrier.
     */
-   if (ctl->PagePrecedes(shared->latest_page_number, cutoffPage))
+   if (ctl->PagePrecedes(pg_atomic_read_u64(&shared->latest_page_number),
+                         cutoffPage))
    {
        LWLockRelease(shared->ControlLock);
        ereport(LOG,
@@ -1282,7 +1299,7 @@ restart:
        return;
    }
 
-   for (slotno = 0; slotno < shared->num_slots; slotno++)
+   for (int slotno = 0; slotno < shared->num_slots; slotno++)
    {
        if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
            continue;
index b05f6bc71d912c500c222b4e67c8d27c62febd36..210948865471d421f7f8adbf8ccb3a7302c631f0 100644 (file)
@@ -49,6 +49,9 @@ typedef enum
 
 /*
  * Shared-memory state
+ *
+ * ControlLock is used to protect access to the other fields, except
+ * latest_page_number, which uses atomics; see comment in slru.c.
  */
 typedef struct SlruSharedData
 {
@@ -95,7 +98,7 @@ typedef struct SlruSharedData
     * this is not critical data, since we use it only to avoid swapping out
     * the latest page.
     */
-   int64       latest_page_number;
+   pg_atomic_uint64 latest_page_number;
 
    /* SLRU's index for statistics purposes (might not be unique) */
    int         slru_stats_idx;