Avoid updating inactive_since for invalid replication slots.
authorAmit Kapila <akapila@postgresql.org>
Wed, 5 Feb 2025 03:26:14 +0000 (08:56 +0530)
committerAmit Kapila <akapila@postgresql.org>
Wed, 5 Feb 2025 03:26:14 +0000 (08:56 +0530)
It is possible for the inactive_since value of an invalid replication slot
to be updated multiple times, which is unexpected behavior like during the
release of the slot or at the time of restart. This is harmless because
invalid slots are not allowed to be accessed but it is not prudent to
update invalid slots. We are planning to invalidate slots due to other
reasons like idle time and it will look odd that the slot's inactive_since
displays the recent time in this field after invalidated due to idle time.
So, this patch ensures that the inactive_since field of slots is not
updated for invalid slots.

In the passing, ensure to use the same inactive_since time for all the
slots at restart while restoring them from the disk.

Author: Nisha Moond <nisha.moond412@gmail.com>
Author: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>
Reviewed-by: Vignesh C <vignesh21@gmail.com>
Reviewed-by: Peter Smith <smithpb2250@gmail.com>
Reviewed-by: Hou Zhijie <houzj.fnst@fujitsu.com>
Reviewed-by: Amit Kapila <amit.kapila16@gmail.com>
Discussion: https://postgr.es/m/CABdArM7QdifQ_MHmMA=Cc4v8+MeckkwKncm2Nn6tX9wSCQ-+iw@mail.gmail.com

doc/src/sgml/system-views.sgml
src/backend/replication/logical/slotsync.c
src/backend/replication/slot.c
src/include/replication/slot.h

index 8e2b0a7927b420daf79a9e04ddd756fc7d26a023..be81c2b51d27258fb21db7236dfff2b8bd8b4923 100644 (file)
@@ -2566,7 +2566,8 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx
       </para>
       <para>
         The time when the slot became inactive. <literal>NULL</literal> if the
-        slot is currently being streamed.
+        slot is currently being streamed. If the slot becomes invalid,
+        this value will never be updated.
         Note that for slots on the standby that are being synced from a
         primary server (whose <structfield>synced</structfield> field is
         <literal>true</literal>), the <structfield>inactive_since</structfield>
index be6f87f00b28ae5507dd432c54e2a2b53af0b0c8..987857b9491c7d058957887b14603e6d5904d07e 100644 (file)
@@ -1541,9 +1541,7 @@ update_synced_slots_inactive_since(void)
            if (now == 0)
                now = GetCurrentTimestamp();
 
-           SpinLockAcquire(&s->mutex);
-           s->inactive_since = now;
-           SpinLockRelease(&s->mutex);
+           ReplicationSlotSetInactiveSince(s, now, true);
        }
    }
 
index c57a13d82089ceeee19bba78c13963b620552af0..fe5acd8b1fc7f6828718b26ba7700d4c5a7f826a 100644 (file)
@@ -644,9 +644,7 @@ retry:
     * Reset the time since the slot has become inactive as the slot is active
     * now.
     */
-   SpinLockAcquire(&s->mutex);
-   s->inactive_since = 0;
-   SpinLockRelease(&s->mutex);
+   ReplicationSlotSetInactiveSince(s, 0, true);
 
    if (am_walsender)
    {
@@ -720,16 +718,12 @@ ReplicationSlotRelease(void)
         */
        SpinLockAcquire(&slot->mutex);
        slot->active_pid = 0;
-       slot->inactive_since = now;
+       ReplicationSlotSetInactiveSince(slot, now, false);
        SpinLockRelease(&slot->mutex);
        ConditionVariableBroadcast(&slot->active_cv);
    }
    else
-   {
-       SpinLockAcquire(&slot->mutex);
-       slot->inactive_since = now;
-       SpinLockRelease(&slot->mutex);
-   }
+       ReplicationSlotSetInactiveSince(slot, now, true);
 
    MyReplicationSlot = NULL;
 
@@ -2218,6 +2212,7 @@ RestoreSlotFromDisk(const char *name)
    bool        restored = false;
    int         readBytes;
    pg_crc32c   checksum;
+   TimestampTz now = 0;
 
    /* no need to lock here, no concurrent access allowed yet */
 
@@ -2408,9 +2403,13 @@ RestoreSlotFromDisk(const char *name)
        /*
         * Set the time since the slot has become inactive after loading the
         * slot from the disk into memory. Whoever acquires the slot i.e.
-        * makes the slot active will reset it.
+        * makes the slot active will reset it. Use the same inactive_since
+        * time for all the slots.
         */
-       slot->inactive_since = GetCurrentTimestamp();
+       if (now == 0)
+           now = GetCurrentTimestamp();
+
+       ReplicationSlotSetInactiveSince(slot, now, false);
 
        restored = true;
        break;
index 47ebdaecb6afeddffcd742e69ee31b3d5cd1d0a8..000c36d30dd2fa39133f1a7f1feef4bb4891e00e 100644 (file)
@@ -228,6 +228,23 @@ typedef struct ReplicationSlotCtlData
    ReplicationSlot replication_slots[1];
 } ReplicationSlotCtlData;
 
+/*
+ * Set slot's inactive_since property unless it was previously invalidated.
+ */
+static inline void
+ReplicationSlotSetInactiveSince(ReplicationSlot *s, TimestampTz ts,
+                               bool acquire_lock)
+{
+   if (acquire_lock)
+       SpinLockAcquire(&s->mutex);
+
+   if (s->data.invalidated == RS_INVAL_NONE)
+       s->inactive_since = ts;
+
+   if (acquire_lock)
+       SpinLockRelease(&s->mutex);
+}
+
 /*
  * Pointers to shared memory
  */