diff options
author | Amit Kapila | 2021-04-27 03:39:11 +0000 |
---|---|---|
committer | Amit Kapila | 2021-04-27 03:39:11 +0000 |
commit | 3fa17d37716f978f80dfcdab4e7c73f3a24e7a48 (patch) | |
tree | 43a865e413ebd2852e418535b8fbbdb4a83d6d78 /contrib/test_decoding | |
parent | e7eea52b2d61917fbbdac7f3f895e4ef636e935b (diff) |
Use HTAB for replication slot statistics.
Previously, we used to use the array of size max_replication_slots to
store stats for replication slots. But that had two problems in the cases
where a message for dropping a slot gets lost: 1) the stats for the new
slot are not recorded if the array is full and 2) writing beyond the end
of the array if the user reduces the max_replication_slots.
This commit uses HTAB for replication slot statistics, resolving both
problems. Now, pgstat_vacuum_stat() search for all the dead replication
slots in stats hashtable and tell the collector to remove them. To avoid
showing the stats for the already-dropped slots, pg_stat_replication_slots
view searches slot stats by the slot name taken from pg_replication_slots.
Also, we send a message for creating a slot at slot creation, initializing
the stats. This reduces the possibility that the stats are accumulated
into the old slot stats when a message for dropping a slot gets lost.
Reported-by: Andres Freund
Author: Sawada Masahiko, test case by Vignesh C
Reviewed-by: Amit Kapila, Vignesh C, Dilip Kumar
Discussion: https://postgr.es/m/20210319185247.ldebgpdaxsowiflw@alap3.anarazel.de
Diffstat (limited to 'contrib/test_decoding')
-rw-r--r-- | contrib/test_decoding/t/001_repl_stats.pl | 69 |
1 files changed, 54 insertions, 15 deletions
diff --git a/contrib/test_decoding/t/001_repl_stats.pl b/contrib/test_decoding/t/001_repl_stats.pl index 11b6cd9b9c..3ab0e80722 100644 --- a/contrib/test_decoding/t/001_repl_stats.pl +++ b/contrib/test_decoding/t/001_repl_stats.pl @@ -2,9 +2,10 @@ # drop replication slot and restart. use strict; use warnings; +use File::Path qw(rmtree); use PostgresNode; use TestLib; -use Test::More tests => 1; +use Test::More tests => 2; # Test set-up my $node = get_new_node('test'); @@ -12,9 +13,22 @@ $node->init(allows_streaming => 'logical'); $node->append_conf('postgresql.conf', 'synchronous_commit = on'); $node->start; +# Check that replication slot stats are expected. +sub test_slot_stats +{ + my ($node, $expected, $msg) = @_; + + my $result = $node->safe_psql( + 'postgres', qq[ + SELECT slot_name, total_txns > 0 AS total_txn, + total_bytes > 0 AS total_bytes + FROM pg_stat_replication_slots + ORDER BY slot_name]); + is($result, $expected, $msg); +} + # Create table. -$node->safe_psql('postgres', - "CREATE TABLE test_repl_stat(col1 int)"); +$node->safe_psql('postgres', "CREATE TABLE test_repl_stat(col1 int)"); # Create replication slots. $node->safe_psql( @@ -26,7 +40,8 @@ $node->safe_psql( ]); # Insert some data. -$node->safe_psql('postgres', "INSERT INTO test_repl_stat values(generate_series(1, 5));"); +$node->safe_psql('postgres', + "INSERT INTO test_repl_stat values(generate_series(1, 5));"); $node->safe_psql( 'postgres', qq[ @@ -50,27 +65,51 @@ $node->poll_query_until( # Test to drop one of the replication slot and verify replication statistics data is # fine after restart. -$node->safe_psql('postgres', "SELECT pg_drop_replication_slot('regression_slot4')"); +$node->safe_psql('postgres', + "SELECT pg_drop_replication_slot('regression_slot4')"); $node->stop; $node->start; # Verify statistics data present in pg_stat_replication_slots are sane after # restart. -my $result = $node->safe_psql('postgres', - "SELECT slot_name, total_txns > 0 AS total_txn, - total_bytes > 0 AS total_bytes FROM pg_stat_replication_slots - ORDER BY slot_name" -); -is($result, qq(regression_slot1|t|t +test_slot_stats( + $node, + qq(regression_slot1|t|t regression_slot2|t|t -regression_slot3|t|t), 'check replication statistics are updated'); +regression_slot3|t|t), + 'check replication statistics are updated'); + +# Test to remove one of the replication slots and adjust +# max_replication_slots accordingly to the number of slots. This leads +# to a mismatch between the number of slots present in the stats file and the +# number of stats present in the shared memory, simulating the scenario for +# drop slot message lost by the statistics collector process. We verify +# replication statistics data is fine after restart. + +$node->stop; +my $datadir = $node->data_dir; +my $slot3_replslotdir = "$datadir/pg_replslot/regression_slot3"; + +rmtree($slot3_replslotdir); + +$node->append_conf('postgresql.conf', 'max_replication_slots = 2'); +$node->start; + +# Verify statistics data present in pg_stat_replication_slots are sane after +# restart. +test_slot_stats( + $node, + qq(regression_slot1|t|t +regression_slot2|t|t), + 'check replication statistics after removing the slot file'); # cleanup $node->safe_psql('postgres', "DROP TABLE test_repl_stat"); -$node->safe_psql('postgres', "SELECT pg_drop_replication_slot('regression_slot1')"); -$node->safe_psql('postgres', "SELECT pg_drop_replication_slot('regression_slot2')"); -$node->safe_psql('postgres', "SELECT pg_drop_replication_slot('regression_slot3')"); +$node->safe_psql('postgres', + "SELECT pg_drop_replication_slot('regression_slot1')"); +$node->safe_psql('postgres', + "SELECT pg_drop_replication_slot('regression_slot2')"); # shutdown $node->stop; |