Don't fail for > 1 walsenders in 019_replslot_limit, add debug messages.

author Andres Freund <andres@anarazel.de>

Mon, 28 Mar 2022 05:29:19 +0000 (22:29 -0700)

committer Andres Freund <andres@anarazel.de>

Mon, 28 Mar 2022 05:35:42 +0000 (22:35 -0700)
author Andres Freund <andres@anarazel.de>
Mon, 28 Mar 2022 05:29:19 +0000 (22:29 -0700)
committer Andres Freund <andres@anarazel.de>
Mon, 28 Mar 2022 05:35:42 +0000 (22:35 -0700)
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c

index caa6b297560ef530e140409b0b35fb9359e2d2f4..ed4c8b3ad5586839fa4c22b8885dd4d43d3b45bd 100644 (file)
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -702,8 +702,13 @@ ReplicationSlotDropPtr(ReplicationSlot *slot)
     slot->active_pid = 0;
     slot->in_use = false;
     LWLockRelease(ReplicationSlotControlLock);
+
+   elog(DEBUG3, "replication slot drop: %s: marked as not in use", NameStr(slot->data.name));
+
     ConditionVariableBroadcast(&slot->active_cv);
  
+   elog(DEBUG3, "replication slot drop: %s: notified others", NameStr(slot->data.name));
+
     /*
      * Slot is dead and doesn't prevent resource removal anymore, recompute
      * limits.
@@ -711,6 +716,8 @@ ReplicationSlotDropPtr(ReplicationSlot *slot)
     ReplicationSlotsComputeRequiredXmin(false);
     ReplicationSlotsComputeRequiredLSN();
  
+   elog(DEBUG3, "replication slot drop: %s: computed required", NameStr(slot->data.name));
+
     /*
      * If removing the directory fails, the worst thing that will happen is
      * that the user won't be able to create a new slot with the same name
@@ -720,6 +727,8 @@ ReplicationSlotDropPtr(ReplicationSlot *slot)
         ereport(WARNING,
                 (errmsg("could not remove directory \"%s\"", tmppath)));
  
+   elog(DEBUG3, "replication slot drop: %s: removed directory", NameStr(slot->data.name));
+
     /*
      * Send a message to drop the replication slot to the stats collector.
      * Since there is no guarantee of the order of message transfer on a UDP
diff --git a/src/test/recovery/t/019_replslot_limit.pl b/src/test/recovery/t/019_replslot_limit.pl

index 77bb401bc5fd2a329b2842d45fbd612990beaff0..5654f3b545537e5d96f1664b72f1061213c0f50b 100644 (file)
--- a/src/test/recovery/t/019_replslot_limit.pl
+++ b/src/test/recovery/t/019_replslot_limit.pl
@@ -339,8 +339,8 @@ my $senderpid;
  # We've seen occasional cases where multiple walsender pids are active. It
  # could be that we're just observing process shutdown being slow. To collect
  # more information, retry a couple times, print a bit of debugging information
-# each iteration. For now report a test failure even if later iterations
-# succeed.
+# each iteration. Don't fail the test if retries find just one pid, the
+# buildfarm failures are too noisy.
  my $i = 0;
  while (1)
  {
@@ -349,7 +349,9 @@ while (1)
     $senderpid = $node_primary3->safe_psql('postgres',
         "SELECT pid FROM pg_stat_activity WHERE backend_type = 'walsender'");
  
-   last if like($senderpid, qr/^[0-9]+$/, "have walsender pid $senderpid");
+   last if $senderpid =~ qr/^[0-9]+$/;
+
+   diag "multiple walsenders active in iteration $i";
  
     # show information about all active connections
     $node_primary3->psql('postgres',
@@ -370,6 +372,8 @@ while (1)
     usleep(100_000);
  }
  
+like($senderpid, qr/^[0-9]+$/, "have walsender pid $senderpid");
+
  my $receiverpid = $node_standby3->safe_psql('postgres',
     "SELECT pid FROM pg_stat_activity WHERE backend_type = 'walreceiver'");
  like($receiverpid, qr/^[0-9]+$/, "have walreceiver pid $receiverpid");
author	Andres Freund <andres@anarazel.de>
	Mon, 28 Mar 2022 05:29:19 +0000 (22:29 -0700)
committer	Andres Freund <andres@anarazel.de>
	Mon, 28 Mar 2022 05:35:42 +0000 (22:35 -0700)
src/backend/replication/slot.c		patch \| blob \| blame \| history
src/test/recovery/t/019_replslot_limit.pl		patch \| blob \| blame \| history