summaryrefslogtreecommitdiff
path: root/src/test
diff options
context:
space:
mode:
authorAmit Kapila2024-02-22 09:55:15 +0000
committerAmit Kapila2024-02-22 09:55:15 +0000
commit93db6cbda037f1be9544932bd9a785dabf3ff712 (patch)
tree804da2c1a7ee4634dc92789c98fa611dada365de /src/test
parent3d47b75546d1ef70145f58e162a96f7e0c649389 (diff)
Add a new slot sync worker to synchronize logical slots.
By enabling slot synchronization, all the failover logical replication slots on the primary (assuming configurations are appropriate) are automatically created on the physical standbys and are synced periodically. The slot sync worker on the standby server pings the primary server at regular intervals to get the necessary failover logical slots information and create/update the slots locally. The slots that no longer require synchronization are automatically dropped by the worker. The nap time of the worker is tuned according to the activity on the primary. The slot sync worker waits for some time before the next synchronization, with the duration varying based on whether any slots were updated during the last cycle. A new parameter sync_replication_slots enables or disables this new process. On promotion, the slot sync worker is shut down by the startup process to drop any temporary slots acquired by the slot sync worker and to prevent the worker from trying to fetch the failover slots. A functionality to allow logical walsenders to wait for the physical will be done in a subsequent commit. Author: Shveta Malik, Hou Zhijie based on design inputs by Masahiko Sawada and Amit Kapila Reviewed-by: Masahiko Sawada, Bertrand Drouvot, Peter Smith, Dilip Kumar, Ajin Cherian, Nisha Moond, Kuroda Hayato, Amit Kapila Discussion: https://postgr.es/m/514f6f2f-6833-4539-39f1-96cd1e011f23@enterprisedb.com
Diffstat (limited to 'src/test')
-rw-r--r--src/test/recovery/t/040_standby_failover_slots_sync.pl120
1 files changed, 120 insertions, 0 deletions
diff --git a/src/test/recovery/t/040_standby_failover_slots_sync.pl b/src/test/recovery/t/040_standby_failover_slots_sync.pl
index 0f2f819f53b..e24009610ad 100644
--- a/src/test/recovery/t/040_standby_failover_slots_sync.pl
+++ b/src/test/recovery/t/040_standby_failover_slots_sync.pl
@@ -322,6 +322,10 @@ ok( $stderr =~
/ERROR: slot synchronization requires dbname to be specified in primary_conninfo/,
"cannot sync slots if dbname is not specified in primary_conninfo");
+# Add the dbname back to the primary_conninfo for further tests
+$standby1->append_conf('postgresql.conf', "primary_conninfo = '$connstr_1 dbname=postgres'");
+$standby1->reload;
+
##################################################
# Test that we cannot synchronize slots to a cascading standby server.
##################################################
@@ -355,4 +359,120 @@ ok( $stderr =~
/ERROR: cannot synchronize replication slots from a standby server/,
"cannot sync slots to a cascading standby server");
+$cascading_standby->stop;
+
+##################################################
+# Test to confirm that the slot sync worker exits on invalid GUC(s) and
+# get started again on valid GUC(s).
+##################################################
+
+$log_offset = -s $standby1->logfile;
+
+# Enable slot sync worker.
+$standby1->append_conf('postgresql.conf', qq(sync_replication_slots = on));
+$standby1->reload;
+
+# Confirm that the slot sync worker is able to start.
+$standby1->wait_for_log(qr/LOG: slot sync worker started/,
+ $log_offset);
+
+$log_offset = -s $standby1->logfile;
+
+# Disable another GUC required for slot sync.
+$standby1->append_conf( 'postgresql.conf', qq(hot_standby_feedback = off));
+$standby1->reload;
+
+# Confirm that slot sync worker acknowledge the GUC change and logs the msg
+# about wrong configuration.
+$standby1->wait_for_log(qr/LOG: slot sync worker will restart because of a parameter change/,
+ $log_offset);
+$standby1->wait_for_log(qr/LOG: slot synchronization requires hot_standby_feedback to be enabled/,
+ $log_offset);
+
+$log_offset = -s $standby1->logfile;
+
+# Re-enable the required GUC
+$standby1->append_conf('postgresql.conf', "hot_standby_feedback = on");
+$standby1->reload;
+
+# Confirm that the slot sync worker is able to start now.
+$standby1->wait_for_log(qr/LOG: slot sync worker started/,
+ $log_offset);
+
+##################################################
+# Test to confirm that restart_lsn and confirmed_flush_lsn of the logical slot
+# on the primary is synced to the standby via the slot sync worker.
+##################################################
+
+# Insert data on the primary
+$primary->safe_psql(
+ 'postgres', qq[
+ CREATE TABLE tab_int (a int PRIMARY KEY);
+ INSERT INTO tab_int SELECT generate_series(1, 10);
+]);
+
+# Subscribe to the new table data and wait for it to arrive
+$subscriber1->safe_psql(
+ 'postgres', qq[
+ CREATE TABLE tab_int (a int PRIMARY KEY);
+ ALTER SUBSCRIPTION regress_mysub1 ENABLE;
+ ALTER SUBSCRIPTION regress_mysub1 REFRESH PUBLICATION;
+]);
+
+$subscriber1->wait_for_subscription_sync;
+
+# Do not allow any further advancement of the restart_lsn and
+# confirmed_flush_lsn for the lsub1_slot.
+$subscriber1->safe_psql('postgres', "ALTER SUBSCRIPTION regress_mysub1 DISABLE");
+
+# Wait for the replication slot to become inactive on the publisher
+$primary->poll_query_until(
+ 'postgres',
+ "SELECT COUNT(*) FROM pg_catalog.pg_replication_slots WHERE slot_name = 'lsub1_slot' AND active='f'",
+ 1);
+
+# Get the restart_lsn for the logical slot lsub1_slot on the primary
+my $primary_restart_lsn = $primary->safe_psql('postgres',
+ "SELECT restart_lsn from pg_replication_slots WHERE slot_name = 'lsub1_slot';");
+
+# Get the confirmed_flush_lsn for the logical slot lsub1_slot on the primary
+my $primary_flush_lsn = $primary->safe_psql('postgres',
+ "SELECT confirmed_flush_lsn from pg_replication_slots WHERE slot_name = 'lsub1_slot';");
+
+# Confirm that restart_lsn and confirmed_flush_lsn of lsub1_slot slot are synced
+# to the standby
+ok( $standby1->poll_query_until(
+ 'postgres',
+ "SELECT '$primary_restart_lsn' = restart_lsn AND '$primary_flush_lsn' = confirmed_flush_lsn from pg_replication_slots WHERE slot_name = 'lsub1_slot' AND synced AND NOT temporary;"),
+ 'restart_lsn and confirmed_flush_lsn of slot lsub1_slot synced to standby');
+
+##################################################
+# Promote the standby1 to primary. Confirm that:
+# a) the slot 'lsub1_slot' is retained on the new primary
+# b) logical replication for regress_mysub1 is resumed successfully after failover
+##################################################
+$standby1->promote;
+
+# Update subscription with the new primary's connection info
+my $standby1_conninfo = $standby1->connstr . ' dbname=postgres';
+$subscriber1->safe_psql('postgres',
+ "ALTER SUBSCRIPTION regress_mysub1 CONNECTION '$standby1_conninfo';
+ ALTER SUBSCRIPTION regress_mysub1 ENABLE; ");
+
+# Confirm the synced slot 'lsub1_slot' is retained on the new primary
+is($standby1->safe_psql('postgres',
+ q{SELECT slot_name FROM pg_replication_slots WHERE slot_name = 'lsub1_slot' AND synced AND NOT temporary;}),
+ 'lsub1_slot',
+ 'synced slot retained on the new primary');
+
+# Insert data on the new primary
+$standby1->safe_psql('postgres',
+ "INSERT INTO tab_int SELECT generate_series(11, 20);");
+$standby1->wait_for_catchup('regress_mysub1');
+
+# Confirm that data in tab_int replicated on the subscriber
+is( $subscriber1->safe_psql('postgres', q{SELECT count(*) FROM tab_int;}),
+ "20",
+ 'data replicated from the new primary');
+
done_testing();