Fix assertion when decoding XLOG_PARAMETER_CHANGE on promoted primary.
authorMasahiko Sawada <msawada@postgresql.org>
Mon, 24 Feb 2025 22:03:04 +0000 (14:03 -0800)
committerMasahiko Sawada <msawada@postgresql.org>
Mon, 24 Feb 2025 22:03:04 +0000 (14:03 -0800)
When a standby replays an XLOG_PARAMETER_CHANGE record that lowers
wal_level below logical, we invalidate all logical slots in hot
standby mode. However, if this record was replayed while not in hot
standby mode, logical slots could remain valid even after promotion,
potentially causing an assertion failure during WAL record decoding.

To fix this issue, this commit adds a check for hot_standby status
when restoring a logical replication slot on standbys. This check
ensures that logical slots are invalidated when they become
incompatible due to insufficient wal_level during recovery.

Backpatch to v16 where logical decoding on standby was introduced.

Reviewed-by: Amit Kapila <amit.kapila16@gmail.com>
Reviewed-by: Bertrand Drouvot <bertranddrouvot.pg@gmail.com>
Discussion: https://postgr.es/m/CAD21AoABoFwGY_Rh2aeE6tEq3HkJxf0c6UeOXn4VV9v6BAQPSw%40mail.gmail.com
Backpatch-through: 16

src/backend/replication/slot.c
src/test/recovery/t/035_standby_logical_decoding.pl

index 84270c493a59202726a8d5b4c24de75d2eb85f54..d089085b491a60884e795e3fc419df1d1f1a0af4 100644 (file)
@@ -2521,12 +2521,29 @@ RestoreSlotFromDisk(const char *name)
     * NB: Changing the requirements here also requires adapting
     * CheckSlotRequirements() and CheckLogicalDecodingRequirements().
     */
-   if (cp.slotdata.database != InvalidOid && wal_level < WAL_LEVEL_LOGICAL)
-       ereport(FATAL,
-               (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-                errmsg("logical replication slot \"%s\" exists, but \"wal_level\" < \"logical\"",
-                       NameStr(cp.slotdata.name)),
-                errhint("Change \"wal_level\" to be \"logical\" or higher.")));
+   if (cp.slotdata.database != InvalidOid)
+   {
+       if (wal_level < WAL_LEVEL_LOGICAL)
+           ereport(FATAL,
+                   (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                    errmsg("logical replication slot \"%s\" exists, but \"wal_level\" < \"logical\"",
+                           NameStr(cp.slotdata.name)),
+                    errhint("Change \"wal_level\" to be \"logical\" or higher.")));
+
+       /*
+        * In standby mode, the hot standby must be enabled. This check is
+        * necessary to ensure logical slots are invalidated when they become
+        * incompatible due to insufficient wal_level. Otherwise, if the
+        * primary reduces wal_level < logical while hot standby is disabled,
+        * logical slots would remain valid even after promotion.
+        */
+       if (StandbyMode && !EnableHotStandby)
+           ereport(FATAL,
+                   (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                    errmsg("logical replication slot \"%s\" exists on the standby, but \"hot_standby\" = \"off\"",
+                           NameStr(cp.slotdata.name)),
+                    errhint("Change \"hot_standby\" to be \"on\".")));
+   }
    else if (wal_level < WAL_LEVEL_REPLICA)
        ereport(FATAL,
                (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
index 505e85d1eb6b9f0fe80ccdab581d9fd290c999da..8903177d883752e58d8db88520906cda6a4abc35 100644 (file)
@@ -342,6 +342,44 @@ $psql_subscriber{run} = IPC::Run::start(
    \$psql_subscriber{subscriber_stderr},
    IPC::Run::timeout($default_timeout));
 
+##################################################
+# Test that the standby requires hot_standby to be
+# enabled for pre-existing logical slots.
+##################################################
+
+# create the logical slots
+$node_standby->create_logical_slot_on_standby($node_primary, 'restart_test');
+$node_standby->stop;
+$node_standby->append_conf('postgresql.conf', qq[hot_standby = off]);
+
+# Use run_log instead of $node_standby->start because this test expects
+# that the server ends with an error during startup.
+run_log(
+   [
+       'pg_ctl',
+       '--pgdata' => $node_standby->data_dir,
+       '--log' => $node_standby->logfile,
+       'start',
+   ]);
+
+# wait for postgres to terminate
+foreach my $i (0 .. 10 * $PostgreSQL::Test::Utils::timeout_default)
+{
+   last if !-f $node_standby->data_dir . '/postmaster.pid';
+   usleep(100_000);
+}
+
+# Confirm that the server startup fails with an expected error
+my $logfile = slurp_file($node_standby->logfile());
+ok( $logfile =~
+     qr/FATAL: .* logical replication slot ".*" exists on the standby, but "hot_standby" = "off"/,
+   "the standby ends with an error during startup because hot_standby was disabled"
+);
+$node_standby->adjust_conf('postgresql.conf', 'hot_standby', 'on');
+$node_standby->start;
+$node_standby->safe_psql('postgres',
+   qq[SELECT pg_drop_replication_slot('restart_test')]);
+
 ##################################################
 # Test that logical decoding on the standby
 # behaves correctly.