PostgreSQL Source Code git master
slotsync.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 * slotsync.c
3 * Functionality for synchronizing slots to a standby server from the
4 * primary server.
5 *
6 * Copyright (c) 2024-2025, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * src/backend/replication/logical/slotsync.c
10 *
11 * This file contains the code for slot synchronization on a physical standby
12 * to fetch logical failover slots information from the primary server, create
13 * the slots on the standby and synchronize them periodically.
14 *
15 * Slot synchronization can be performed either automatically by enabling slot
16 * sync worker or manually by calling SQL function pg_sync_replication_slots().
17 *
18 * If the WAL corresponding to the remote's restart_lsn is not available on the
19 * physical standby or the remote's catalog_xmin precedes the oldest xid for
20 * which it is guaranteed that rows wouldn't have been removed then we cannot
21 * create the local standby slot because that would mean moving the local slot
22 * backward and decoding won't be possible via such a slot. In this case, the
23 * slot will be marked as RS_TEMPORARY. Once the primary server catches up,
24 * the slot will be marked as RS_PERSISTENT (which means sync-ready) after
25 * which slot sync worker can perform the sync periodically or user can call
26 * pg_sync_replication_slots() periodically to perform the syncs.
27 *
28 * If synchronized slots fail to build a consistent snapshot from the
29 * restart_lsn before reaching confirmed_flush_lsn, they would become
30 * unreliable after promotion due to potential data loss from changes
31 * before reaching a consistent point. This can happen because the slots can
32 * be synced at some random time and we may not reach the consistent point
33 * at the same WAL location as the primary. So, we mark such slots as
34 * RS_TEMPORARY. Once the decoding from corresponding LSNs can reach a
35 * consistent point, they will be marked as RS_PERSISTENT.
36 *
37 * The slot sync worker waits for some time before the next synchronization,
38 * with the duration varying based on whether any slots were updated during
39 * the last cycle. Refer to the comments above wait_for_slot_activity() for
40 * more details.
41 *
42 * Any standby synchronized slots will be dropped if they no longer need
43 * to be synchronized. See comment atop drop_local_obsolete_slots() for more
44 * details.
45 *---------------------------------------------------------------------------
46 */
47
48#include "postgres.h"
49
50#include <time.h>
51
53#include "access/xlogrecovery.h"
54#include "catalog/pg_database.h"
55#include "commands/dbcommands.h"
56#include "libpq/pqsignal.h"
57#include "pgstat.h"
59#include "replication/logical.h"
62#include "storage/ipc.h"
63#include "storage/lmgr.h"
64#include "storage/proc.h"
65#include "storage/procarray.h"
66#include "tcop/tcopprot.h"
67#include "utils/builtins.h"
68#include "utils/pg_lsn.h"
69#include "utils/ps_status.h"
70#include "utils/timeout.h"
71
72/*
73 * Struct for sharing information to control slot synchronization.
74 *
75 * The slot sync worker's pid is needed by the startup process to shut it
76 * down during promotion. The startup process shuts down the slot sync worker
77 * and also sets stopSignaled=true to handle the race condition when the
78 * postmaster has not noticed the promotion yet and thus may end up restarting
79 * the slot sync worker. If stopSignaled is set, the worker will exit in such a
80 * case. The SQL function pg_sync_replication_slots() will also error out if
81 * this flag is set. Note that we don't need to reset this variable as after
82 * promotion the slot sync worker won't be restarted because the pmState
83 * changes to PM_RUN from PM_HOT_STANDBY and we don't support demoting
84 * primary without restarting the server. See LaunchMissingBackgroundProcesses.
85 *
86 * The 'syncing' flag is needed to prevent concurrent slot syncs to avoid slot
87 * overwrites.
88 *
89 * The 'last_start_time' is needed by postmaster to start the slot sync worker
90 * once per SLOTSYNC_RESTART_INTERVAL_SEC. In cases where an immediate restart
91 * is expected (e.g., slot sync GUCs change), slot sync worker will reset
92 * last_start_time before exiting, so that postmaster can start the worker
93 * without waiting for SLOTSYNC_RESTART_INTERVAL_SEC.
94 */
95typedef struct SlotSyncCtxStruct
96{
97 pid_t pid;
99 bool syncing;
101 slock_t mutex;
103
105
106/* GUC variable */
108
109/*
110 * The sleep time (ms) between slot-sync cycles varies dynamically
111 * (within a MIN/MAX range) according to slot activity. See
112 * wait_for_slot_activity() for details.
113 */
114#define MIN_SLOTSYNC_WORKER_NAPTIME_MS 200
115#define MAX_SLOTSYNC_WORKER_NAPTIME_MS 30000 /* 30s */
116
118
119/* The restart interval for slot sync work used by postmaster */
120#define SLOTSYNC_RESTART_INTERVAL_SEC 10
121
122/*
123 * Flag to tell if we are syncing replication slots. Unlike the 'syncing' flag
124 * in SlotSyncCtxStruct, this flag is true only if the current process is
125 * performing slot synchronization.
126 */
127static bool syncing_slots = false;
128
129/*
130 * Structure to hold information fetched from the primary server about a logical
131 * replication slot.
132 */
133typedef struct RemoteSlot
134{
135 char *name;
136 char *plugin;
137 char *database;
144
145 /* RS_INVAL_NONE if valid, or the reason of invalidation */
148
149static void slotsync_failure_callback(int code, Datum arg);
150static void update_synced_slots_inactive_since(void);
151
152/*
153 * If necessary, update the local synced slot's metadata based on the data
154 * from the remote slot.
155 *
156 * If no update was needed (the data of the remote slot is the same as the
157 * local slot) return false, otherwise true.
158 *
159 * *found_consistent_snapshot will be true iff the remote slot's LSN or xmin is
160 * modified, and decoding from the corresponding LSN's can reach a
161 * consistent snapshot.
162 *
163 * *remote_slot_precedes will be true if the remote slot's LSN or xmin
164 * precedes locally reserved position.
165 */
166static bool
167update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid,
168 bool *found_consistent_snapshot,
169 bool *remote_slot_precedes)
170{
172 bool updated_xmin_or_lsn = false;
173 bool updated_config = false;
174
176
177 if (found_consistent_snapshot)
178 *found_consistent_snapshot = false;
179
180 if (remote_slot_precedes)
181 *remote_slot_precedes = false;
182
183 /*
184 * Don't overwrite if we already have a newer catalog_xmin and
185 * restart_lsn.
186 */
187 if (remote_slot->restart_lsn < slot->data.restart_lsn ||
189 slot->data.catalog_xmin))
190 {
191 /*
192 * This can happen in following situations:
193 *
194 * If the slot is temporary, it means either the initial WAL location
195 * reserved for the local slot is ahead of the remote slot's
196 * restart_lsn or the initial xmin_horizon computed for the local slot
197 * is ahead of the remote slot.
198 *
199 * If the slot is persistent, restart_lsn of the synced slot could
200 * still be ahead of the remote slot. Since we use slot advance
201 * functionality to keep snapbuild/slot updated, it is possible that
202 * the restart_lsn is advanced to a later position than it has on the
203 * primary. This can happen when slot advancing machinery finds
204 * running xacts record after reaching the consistent state at a later
205 * point than the primary where it serializes the snapshot and updates
206 * the restart_lsn.
207 *
208 * We LOG the message if the slot is temporary as it can help the user
209 * to understand why the slot is not sync-ready. In the case of a
210 * persistent slot, it would be a more common case and won't directly
211 * impact the users, so we used DEBUG1 level to log the message.
212 */
214 errmsg("could not synchronize replication slot \"%s\" because remote slot precedes local slot",
215 remote_slot->name),
216 errdetail("The remote slot has LSN %X/%X and catalog xmin %u, but the local slot has LSN %X/%X and catalog xmin %u.",
217 LSN_FORMAT_ARGS(remote_slot->restart_lsn),
218 remote_slot->catalog_xmin,
220 slot->data.catalog_xmin));
221
222 if (remote_slot_precedes)
223 *remote_slot_precedes = true;
224 }
225
226 /*
227 * Attempt to sync LSNs and xmins only if remote slot is ahead of local
228 * slot.
229 */
230 else if (remote_slot->confirmed_lsn > slot->data.confirmed_flush ||
231 remote_slot->restart_lsn > slot->data.restart_lsn ||
233 slot->data.catalog_xmin))
234 {
235 /*
236 * We can't directly copy the remote slot's LSN or xmin unless there
237 * exists a consistent snapshot at that point. Otherwise, after
238 * promotion, the slots may not reach a consistent point before the
239 * confirmed_flush_lsn which can lead to a data loss. To avoid data
240 * loss, we let slot machinery advance the slot which ensures that
241 * snapbuilder/slot statuses are updated properly.
242 */
243 if (SnapBuildSnapshotExists(remote_slot->restart_lsn))
244 {
245 /*
246 * Update the slot info directly if there is a serialized snapshot
247 * at the restart_lsn, as the slot can quickly reach consistency
248 * at restart_lsn by restoring the snapshot.
249 */
250 SpinLockAcquire(&slot->mutex);
251 slot->data.restart_lsn = remote_slot->restart_lsn;
252 slot->data.confirmed_flush = remote_slot->confirmed_lsn;
253 slot->data.catalog_xmin = remote_slot->catalog_xmin;
254 SpinLockRelease(&slot->mutex);
255
256 if (found_consistent_snapshot)
257 *found_consistent_snapshot = true;
258 }
259 else
260 {
262 found_consistent_snapshot);
263
264 /* Sanity check */
265 if (slot->data.confirmed_flush != remote_slot->confirmed_lsn)
267 errmsg_internal("synchronized confirmed_flush for slot \"%s\" differs from remote slot",
268 remote_slot->name),
269 errdetail_internal("Remote slot has LSN %X/%X but local slot has LSN %X/%X.",
270 LSN_FORMAT_ARGS(remote_slot->confirmed_lsn),
272 }
273
274 updated_xmin_or_lsn = true;
275 }
276
277 if (remote_dbid != slot->data.database ||
278 remote_slot->two_phase != slot->data.two_phase ||
279 remote_slot->failover != slot->data.failover ||
280 strcmp(remote_slot->plugin, NameStr(slot->data.plugin)) != 0 ||
281 remote_slot->two_phase_at != slot->data.two_phase_at)
282 {
283 NameData plugin_name;
284
285 /* Avoid expensive operations while holding a spinlock. */
286 namestrcpy(&plugin_name, remote_slot->plugin);
287
288 SpinLockAcquire(&slot->mutex);
289 slot->data.plugin = plugin_name;
290 slot->data.database = remote_dbid;
291 slot->data.two_phase = remote_slot->two_phase;
292 slot->data.two_phase_at = remote_slot->two_phase_at;
293 slot->data.failover = remote_slot->failover;
294 SpinLockRelease(&slot->mutex);
295
296 updated_config = true;
297 }
298
299 /*
300 * We have to write the changed xmin to disk *before* we change the
301 * in-memory value, otherwise after a crash we wouldn't know that some
302 * catalog tuples might have been removed already.
303 */
304 if (updated_config || updated_xmin_or_lsn)
305 {
308 }
309
310 /*
311 * Now the new xmin is safely on disk, we can let the global value
312 * advance. We do not take ProcArrayLock or similar since we only advance
313 * xmin here and there's not much harm done by a concurrent computation
314 * missing that.
315 */
316 if (updated_xmin_or_lsn)
317 {
318 SpinLockAcquire(&slot->mutex);
319 slot->effective_catalog_xmin = remote_slot->catalog_xmin;
320 SpinLockRelease(&slot->mutex);
321
324 }
325
326 return updated_config || updated_xmin_or_lsn;
327}
328
329/*
330 * Get the list of local logical slots that are synchronized from the
331 * primary server.
332 */
333static List *
335{
336 List *local_slots = NIL;
337
338 LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
339
340 for (int i = 0; i < max_replication_slots; i++)
341 {
343
344 /* Check if it is a synchronized slot */
345 if (s->in_use && s->data.synced)
346 {
348 local_slots = lappend(local_slots, s);
349 }
350 }
351
352 LWLockRelease(ReplicationSlotControlLock);
353
354 return local_slots;
355}
356
357/*
358 * Helper function to check if local_slot is required to be retained.
359 *
360 * Return false either if local_slot does not exist in the remote_slots list
361 * or is invalidated while the corresponding remote slot is still valid,
362 * otherwise true.
363 */
364static bool
366{
367 bool remote_exists = false;
368 bool locally_invalidated = false;
369
370 foreach_ptr(RemoteSlot, remote_slot, remote_slots)
371 {
372 if (strcmp(remote_slot->name, NameStr(local_slot->data.name)) == 0)
373 {
374 remote_exists = true;
375
376 /*
377 * If remote slot is not invalidated but local slot is marked as
378 * invalidated, then set locally_invalidated flag.
379 */
380 SpinLockAcquire(&local_slot->mutex);
381 locally_invalidated =
382 (remote_slot->invalidated == RS_INVAL_NONE) &&
383 (local_slot->data.invalidated != RS_INVAL_NONE);
384 SpinLockRelease(&local_slot->mutex);
385
386 break;
387 }
388 }
389
390 return (remote_exists && !locally_invalidated);
391}
392
393/*
394 * Drop local obsolete slots.
395 *
396 * Drop the local slots that no longer need to be synced i.e. these either do
397 * not exist on the primary or are no longer enabled for failover.
398 *
399 * Additionally, drop any slots that are valid on the primary but got
400 * invalidated on the standby. This situation may occur due to the following
401 * reasons:
402 * - The 'max_slot_wal_keep_size' on the standby is insufficient to retain WAL
403 * records from the restart_lsn of the slot.
404 * - 'primary_slot_name' is temporarily reset to null and the physical slot is
405 * removed.
406 * These dropped slots will get recreated in next sync-cycle and it is okay to
407 * drop and recreate such slots as long as these are not consumable on the
408 * standby (which is the case currently).
409 *
410 * Note: Change of 'wal_level' on the primary server to a level lower than
411 * logical may also result in slot invalidation and removal on the standby.
412 * This is because such 'wal_level' change is only possible if the logical
413 * slots are removed on the primary server, so it's expected to see the
414 * slots being invalidated and removed on the standby too (and re-created
415 * if they are re-created on the primary server).
416 */
417static void
419{
420 List *local_slots = get_local_synced_slots();
421
422 foreach_ptr(ReplicationSlot, local_slot, local_slots)
423 {
424 /* Drop the local slot if it is not required to be retained. */
425 if (!local_sync_slot_required(local_slot, remote_slot_list))
426 {
427 bool synced_slot;
428
429 /*
430 * Use shared lock to prevent a conflict with
431 * ReplicationSlotsDropDBSlots(), trying to drop the same slot
432 * during a drop-database operation.
433 */
434 LockSharedObject(DatabaseRelationId, local_slot->data.database,
435 0, AccessShareLock);
436
437 /*
438 * In the small window between getting the slot to drop and
439 * locking the database, there is a possibility of a parallel
440 * database drop by the startup process and the creation of a new
441 * slot by the user. This new user-created slot may end up using
442 * the same shared memory as that of 'local_slot'. Thus check if
443 * local_slot is still the synced one before performing actual
444 * drop.
445 */
446 SpinLockAcquire(&local_slot->mutex);
447 synced_slot = local_slot->in_use && local_slot->data.synced;
448 SpinLockRelease(&local_slot->mutex);
449
450 if (synced_slot)
451 {
452 ReplicationSlotAcquire(NameStr(local_slot->data.name), true, false);
454 }
455
456 UnlockSharedObject(DatabaseRelationId, local_slot->data.database,
457 0, AccessShareLock);
458
459 ereport(LOG,
460 errmsg("dropped replication slot \"%s\" of database with OID %u",
461 NameStr(local_slot->data.name),
462 local_slot->data.database));
463 }
464 }
465}
466
467/*
468 * Reserve WAL for the currently active local slot using the specified WAL
469 * location (restart_lsn).
470 *
471 * If the given WAL location has been removed, reserve WAL using the oldest
472 * existing WAL segment.
473 */
474static void
476{
477 XLogSegNo oldest_segno;
478 XLogSegNo segno;
480
481 Assert(slot != NULL);
483
484 while (true)
485 {
486 SpinLockAcquire(&slot->mutex);
487 slot->data.restart_lsn = restart_lsn;
488 SpinLockRelease(&slot->mutex);
489
490 /* Prevent WAL removal as fast as possible */
492
494
495 /*
496 * Find the oldest existing WAL segment file.
497 *
498 * Normally, we can determine it by using the last removed segment
499 * number. However, if no WAL segment files have been removed by a
500 * checkpoint since startup, we need to search for the oldest segment
501 * file from the current timeline existing in XLOGDIR.
502 *
503 * XXX: Currently, we are searching for the oldest segment in the
504 * current timeline as there is less chance of the slot's restart_lsn
505 * from being some prior timeline, and even if it happens, in the
506 * worst case, we will wait to sync till the slot's restart_lsn moved
507 * to the current timeline.
508 */
509 oldest_segno = XLogGetLastRemovedSegno() + 1;
510
511 if (oldest_segno == 1)
512 {
513 TimeLineID cur_timeline;
514
515 GetWalRcvFlushRecPtr(NULL, &cur_timeline);
516 oldest_segno = XLogGetOldestSegno(cur_timeline);
517 }
518
519 elog(DEBUG1, "segno: " UINT64_FORMAT " of purposed restart_lsn for the synced slot, oldest_segno: " UINT64_FORMAT " available",
520 segno, oldest_segno);
521
522 /*
523 * If all required WAL is still there, great, otherwise retry. The
524 * slot should prevent further removal of WAL, unless there's a
525 * concurrent ReplicationSlotsComputeRequiredLSN() after we've written
526 * the new restart_lsn above, so normally we should never need to loop
527 * more than twice.
528 */
529 if (segno >= oldest_segno)
530 break;
531
532 /* Retry using the location of the oldest wal segment */
533 XLogSegNoOffsetToRecPtr(oldest_segno, 0, wal_segment_size, restart_lsn);
534 }
535}
536
537/*
538 * If the remote restart_lsn and catalog_xmin have caught up with the
539 * local ones, then update the LSNs and persist the local synced slot for
540 * future synchronization; otherwise, do nothing.
541 *
542 * Return true if the slot is marked as RS_PERSISTENT (sync-ready), otherwise
543 * false.
544 */
545static bool
547{
549 bool found_consistent_snapshot = false;
550 bool remote_slot_precedes = false;
551
552 (void) update_local_synced_slot(remote_slot, remote_dbid,
553 &found_consistent_snapshot,
554 &remote_slot_precedes);
555
556 /*
557 * Check if the primary server has caught up. Refer to the comment atop
558 * the file for details on this check.
559 */
560 if (remote_slot_precedes)
561 {
562 /*
563 * The remote slot didn't catch up to locally reserved position.
564 *
565 * We do not drop the slot because the restart_lsn can be ahead of the
566 * current location when recreating the slot in the next cycle. It may
567 * take more time to create such a slot. Therefore, we keep this slot
568 * and attempt the synchronization in the next cycle.
569 */
570 return false;
571 }
572
573 /*
574 * Don't persist the slot if it cannot reach the consistent point from the
575 * restart_lsn. See comments atop this file.
576 */
577 if (!found_consistent_snapshot)
578 {
579 ereport(LOG,
580 errmsg("could not synchronize replication slot \"%s\"", remote_slot->name),
581 errdetail("Logical decoding could not find consistent point from local slot's LSN %X/%X.",
583
584 return false;
585 }
586
588
589 ereport(LOG,
590 errmsg("newly created replication slot \"%s\" is sync-ready now",
591 remote_slot->name));
592
593 return true;
594}
595
596/*
597 * Synchronize a single slot to the given position.
598 *
599 * This creates a new slot if there is no existing one and updates the
600 * metadata of the slot as per the data received from the primary server.
601 *
602 * The slot is created as a temporary slot and stays in the same state until the
603 * remote_slot catches up with locally reserved position and local slot is
604 * updated. The slot is then persisted and is considered as sync-ready for
605 * periodic syncs.
606 *
607 * Returns TRUE if the local slot is updated.
608 */
609static bool
610synchronize_one_slot(RemoteSlot *remote_slot, Oid remote_dbid)
611{
612 ReplicationSlot *slot;
613 XLogRecPtr latestFlushPtr;
614 bool slot_updated = false;
615
616 /*
617 * Make sure that concerned WAL is received and flushed before syncing
618 * slot to target lsn received from the primary server.
619 */
620 latestFlushPtr = GetStandbyFlushRecPtr(NULL);
621 if (remote_slot->confirmed_lsn > latestFlushPtr)
622 {
623 /*
624 * Can get here only if GUC 'synchronized_standby_slots' on the
625 * primary server was not configured correctly.
626 */
628 errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
629 errmsg("skipping slot synchronization because the received slot sync"
630 " LSN %X/%X for slot \"%s\" is ahead of the standby position %X/%X",
631 LSN_FORMAT_ARGS(remote_slot->confirmed_lsn),
632 remote_slot->name,
633 LSN_FORMAT_ARGS(latestFlushPtr)));
634
635 return false;
636 }
637
638 /* Search for the named slot */
639 if ((slot = SearchNamedReplicationSlot(remote_slot->name, true)))
640 {
641 bool synced;
642
643 SpinLockAcquire(&slot->mutex);
644 synced = slot->data.synced;
645 SpinLockRelease(&slot->mutex);
646
647 /* User-created slot with the same name exists, raise ERROR. */
648 if (!synced)
650 errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
651 errmsg("exiting from slot synchronization because same"
652 " name slot \"%s\" already exists on the standby",
653 remote_slot->name));
654
655 /*
656 * The slot has been synchronized before.
657 *
658 * It is important to acquire the slot here before checking
659 * invalidation. If we don't acquire the slot first, there could be a
660 * race condition that the local slot could be invalidated just after
661 * checking the 'invalidated' flag here and we could end up
662 * overwriting 'invalidated' flag to remote_slot's value. See
663 * InvalidatePossiblyObsoleteSlot() where it invalidates slot directly
664 * if the slot is not acquired by other processes.
665 *
666 * XXX: If it ever turns out that slot acquire/release is costly for
667 * cases when none of the slot properties is changed then we can do a
668 * pre-check to ensure that at least one of the slot properties is
669 * changed before acquiring the slot.
670 */
671 ReplicationSlotAcquire(remote_slot->name, true, false);
672
673 Assert(slot == MyReplicationSlot);
674
675 /*
676 * Copy the invalidation cause from remote only if local slot is not
677 * invalidated locally, we don't want to overwrite existing one.
678 */
679 if (slot->data.invalidated == RS_INVAL_NONE &&
680 remote_slot->invalidated != RS_INVAL_NONE)
681 {
682 SpinLockAcquire(&slot->mutex);
683 slot->data.invalidated = remote_slot->invalidated;
684 SpinLockRelease(&slot->mutex);
685
686 /* Make sure the invalidated state persists across server restart */
689
690 slot_updated = true;
691 }
692
693 /* Skip the sync of an invalidated slot */
694 if (slot->data.invalidated != RS_INVAL_NONE)
695 {
697 return slot_updated;
698 }
699
700 /* Slot not ready yet, let's attempt to make it sync-ready now. */
701 if (slot->data.persistency == RS_TEMPORARY)
702 {
703 slot_updated = update_and_persist_local_synced_slot(remote_slot,
704 remote_dbid);
705 }
706
707 /* Slot ready for sync, so sync it. */
708 else
709 {
710 /*
711 * Sanity check: As long as the invalidations are handled
712 * appropriately as above, this should never happen.
713 *
714 * We don't need to check restart_lsn here. See the comments in
715 * update_local_synced_slot() for details.
716 */
717 if (remote_slot->confirmed_lsn < slot->data.confirmed_flush)
719 errmsg_internal("cannot synchronize local slot \"%s\"",
720 remote_slot->name),
721 errdetail_internal("Local slot's start streaming location LSN(%X/%X) is ahead of remote slot's LSN(%X/%X).",
723 LSN_FORMAT_ARGS(remote_slot->confirmed_lsn)));
724
725 slot_updated = update_local_synced_slot(remote_slot, remote_dbid,
726 NULL, NULL);
727 }
728 }
729 /* Otherwise create the slot first. */
730 else
731 {
732 NameData plugin_name;
733 TransactionId xmin_horizon = InvalidTransactionId;
734
735 /* Skip creating the local slot if remote_slot is invalidated already */
736 if (remote_slot->invalidated != RS_INVAL_NONE)
737 return false;
738
739 /*
740 * We create temporary slots instead of ephemeral slots here because
741 * we want the slots to survive after releasing them. This is done to
742 * avoid dropping and re-creating the slots in each synchronization
743 * cycle if the restart_lsn or catalog_xmin of the remote slot has not
744 * caught up.
745 */
746 ReplicationSlotCreate(remote_slot->name, true, RS_TEMPORARY,
747 remote_slot->two_phase,
748 remote_slot->failover,
749 true);
750
751 /* For shorter lines. */
752 slot = MyReplicationSlot;
753
754 /* Avoid expensive operations while holding a spinlock. */
755 namestrcpy(&plugin_name, remote_slot->plugin);
756
757 SpinLockAcquire(&slot->mutex);
758 slot->data.database = remote_dbid;
759 slot->data.plugin = plugin_name;
760 SpinLockRelease(&slot->mutex);
761
763
764 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
765 xmin_horizon = GetOldestSafeDecodingTransactionId(true);
766 SpinLockAcquire(&slot->mutex);
767 slot->effective_catalog_xmin = xmin_horizon;
768 slot->data.catalog_xmin = xmin_horizon;
769 SpinLockRelease(&slot->mutex);
771 LWLockRelease(ProcArrayLock);
772
773 update_and_persist_local_synced_slot(remote_slot, remote_dbid);
774
775 slot_updated = true;
776 }
777
779
780 return slot_updated;
781}
782
783/*
784 * Synchronize slots.
785 *
786 * Gets the failover logical slots info from the primary server and updates
787 * the slots locally. Creates the slots if not present on the standby.
788 *
789 * Returns TRUE if any of the slots gets updated in this sync-cycle.
790 */
791static bool
793{
794#define SLOTSYNC_COLUMN_COUNT 10
795 Oid slotRow[SLOTSYNC_COLUMN_COUNT] = {TEXTOID, TEXTOID, LSNOID,
796 LSNOID, XIDOID, BOOLOID, LSNOID, BOOLOID, TEXTOID, TEXTOID};
797
798 WalRcvExecResult *res;
799 TupleTableSlot *tupslot;
800 List *remote_slot_list = NIL;
801 bool some_slot_updated = false;
802 bool started_tx = false;
803 const char *query = "SELECT slot_name, plugin, confirmed_flush_lsn,"
804 " restart_lsn, catalog_xmin, two_phase, two_phase_at, failover,"
805 " database, invalidation_reason"
806 " FROM pg_catalog.pg_replication_slots"
807 " WHERE failover and NOT temporary";
808
809 /* The syscache access in walrcv_exec() needs a transaction env. */
810 if (!IsTransactionState())
811 {
813 started_tx = true;
814 }
815
816 /* Execute the query */
817 res = walrcv_exec(wrconn, query, SLOTSYNC_COLUMN_COUNT, slotRow);
818 if (res->status != WALRCV_OK_TUPLES)
820 errmsg("could not fetch failover logical slots info from the primary server: %s",
821 res->err));
822
823 /* Construct the remote_slot tuple and synchronize each slot locally */
825 while (tuplestore_gettupleslot(res->tuplestore, true, false, tupslot))
826 {
827 bool isnull;
828 RemoteSlot *remote_slot = palloc0(sizeof(RemoteSlot));
829 Datum d;
830 int col = 0;
831
832 remote_slot->name = TextDatumGetCString(slot_getattr(tupslot, ++col,
833 &isnull));
834 Assert(!isnull);
835
836 remote_slot->plugin = TextDatumGetCString(slot_getattr(tupslot, ++col,
837 &isnull));
838 Assert(!isnull);
839
840 /*
841 * It is possible to get null values for LSN and Xmin if slot is
842 * invalidated on the primary server, so handle accordingly.
843 */
844 d = slot_getattr(tupslot, ++col, &isnull);
845 remote_slot->confirmed_lsn = isnull ? InvalidXLogRecPtr :
846 DatumGetLSN(d);
847
848 d = slot_getattr(tupslot, ++col, &isnull);
849 remote_slot->restart_lsn = isnull ? InvalidXLogRecPtr : DatumGetLSN(d);
850
851 d = slot_getattr(tupslot, ++col, &isnull);
852 remote_slot->catalog_xmin = isnull ? InvalidTransactionId :
854
855 remote_slot->two_phase = DatumGetBool(slot_getattr(tupslot, ++col,
856 &isnull));
857 Assert(!isnull);
858
859 d = slot_getattr(tupslot, ++col, &isnull);
860 remote_slot->two_phase_at = isnull ? InvalidXLogRecPtr : DatumGetLSN(d);
861
862 remote_slot->failover = DatumGetBool(slot_getattr(tupslot, ++col,
863 &isnull));
864 Assert(!isnull);
865
866 remote_slot->database = TextDatumGetCString(slot_getattr(tupslot,
867 ++col, &isnull));
868 Assert(!isnull);
869
870 d = slot_getattr(tupslot, ++col, &isnull);
871 remote_slot->invalidated = isnull ? RS_INVAL_NONE :
873
874 /* Sanity check */
876
877 /*
878 * If restart_lsn, confirmed_lsn or catalog_xmin is invalid but the
879 * slot is valid, that means we have fetched the remote_slot in its
880 * RS_EPHEMERAL state. In such a case, don't sync it; we can always
881 * sync it in the next sync cycle when the remote_slot is persisted
882 * and has valid lsn(s) and xmin values.
883 *
884 * XXX: In future, if we plan to expose 'slot->data.persistency' in
885 * pg_replication_slots view, then we can avoid fetching RS_EPHEMERAL
886 * slots in the first place.
887 */
888 if ((XLogRecPtrIsInvalid(remote_slot->restart_lsn) ||
889 XLogRecPtrIsInvalid(remote_slot->confirmed_lsn) ||
890 !TransactionIdIsValid(remote_slot->catalog_xmin)) &&
891 remote_slot->invalidated == RS_INVAL_NONE)
892 pfree(remote_slot);
893 else
894 /* Create list of remote slots */
895 remote_slot_list = lappend(remote_slot_list, remote_slot);
896
897 ExecClearTuple(tupslot);
898 }
899
900 /* Drop local slots that no longer need to be synced. */
901 drop_local_obsolete_slots(remote_slot_list);
902
903 /* Now sync the slots locally */
904 foreach_ptr(RemoteSlot, remote_slot, remote_slot_list)
905 {
906 Oid remote_dbid = get_database_oid(remote_slot->database, false);
907
908 /*
909 * Use shared lock to prevent a conflict with
910 * ReplicationSlotsDropDBSlots(), trying to drop the same slot during
911 * a drop-database operation.
912 */
913 LockSharedObject(DatabaseRelationId, remote_dbid, 0, AccessShareLock);
914
915 some_slot_updated |= synchronize_one_slot(remote_slot, remote_dbid);
916
917 UnlockSharedObject(DatabaseRelationId, remote_dbid, 0, AccessShareLock);
918 }
919
920 /* We are done, free remote_slot_list elements */
921 list_free_deep(remote_slot_list);
922
924
925 if (started_tx)
927
928 return some_slot_updated;
929}
930
931/*
932 * Checks the remote server info.
933 *
934 * We ensure that the 'primary_slot_name' exists on the remote server and the
935 * remote server is not a standby node.
936 */
937static void
939{
940#define PRIMARY_INFO_OUTPUT_COL_COUNT 2
941 WalRcvExecResult *res;
942 Oid slotRow[PRIMARY_INFO_OUTPUT_COL_COUNT] = {BOOLOID, BOOLOID};
943 StringInfoData cmd;
944 bool isnull;
945 TupleTableSlot *tupslot;
946 bool remote_in_recovery;
947 bool primary_slot_valid;
948 bool started_tx = false;
949
950 initStringInfo(&cmd);
951 appendStringInfo(&cmd,
952 "SELECT pg_is_in_recovery(), count(*) = 1"
953 " FROM pg_catalog.pg_replication_slots"
954 " WHERE slot_type='physical' AND slot_name=%s",
956
957 /* The syscache access in walrcv_exec() needs a transaction env. */
958 if (!IsTransactionState())
959 {
961 started_tx = true;
962 }
963
965 pfree(cmd.data);
966
967 if (res->status != WALRCV_OK_TUPLES)
969 errmsg("could not fetch primary slot name \"%s\" info from the primary server: %s",
970 PrimarySlotName, res->err),
971 errhint("Check if \"primary_slot_name\" is configured correctly."));
972
974 if (!tuplestore_gettupleslot(res->tuplestore, true, false, tupslot))
975 elog(ERROR,
976 "failed to fetch tuple for the primary server slot specified by \"primary_slot_name\"");
977
978 remote_in_recovery = DatumGetBool(slot_getattr(tupslot, 1, &isnull));
979 Assert(!isnull);
980
981 /*
982 * Slot sync is currently not supported on a cascading standby. This is
983 * because if we allow it, the primary server needs to wait for all the
984 * cascading standbys, otherwise, logical subscribers can still be ahead
985 * of one of the cascading standbys which we plan to promote. Thus, to
986 * avoid this additional complexity, we restrict it for the time being.
987 */
988 if (remote_in_recovery)
990 errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
991 errmsg("cannot synchronize replication slots from a standby server"));
992
993 primary_slot_valid = DatumGetBool(slot_getattr(tupslot, 2, &isnull));
994 Assert(!isnull);
995
996 if (!primary_slot_valid)
998 errcode(ERRCODE_INVALID_PARAMETER_VALUE),
999 /* translator: second %s is a GUC variable name */
1000 errmsg("replication slot \"%s\" specified by \"%s\" does not exist on primary server",
1001 PrimarySlotName, "primary_slot_name"));
1002
1003 ExecClearTuple(tupslot);
1005
1006 if (started_tx)
1008}
1009
1010/*
1011 * Checks if dbname is specified in 'primary_conninfo'.
1012 *
1013 * Error out if not specified otherwise return it.
1014 */
1015char *
1017{
1018 char *dbname;
1019
1020 /*
1021 * The slot synchronization needs a database connection for walrcv_exec to
1022 * work.
1023 */
1025 if (dbname == NULL)
1026 ereport(ERROR,
1027 errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1028
1029 /*
1030 * translator: first %s is a connection option; second %s is a GUC
1031 * variable name
1032 */
1033 errmsg("replication slot synchronization requires \"%s\" to be specified in \"%s\"",
1034 "dbname", "primary_conninfo"));
1035 return dbname;
1036}
1037
1038/*
1039 * Return true if all necessary GUCs for slot synchronization are set
1040 * appropriately, otherwise, return false.
1041 */
1042bool
1044{
1045 /*
1046 * Logical slot sync/creation requires wal_level >= logical.
1047 *
1048 * Since altering the wal_level requires a server restart, so error out in
1049 * this case regardless of elevel provided by caller.
1050 */
1052 ereport(ERROR,
1053 errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1054 errmsg("replication slot synchronization requires \"wal_level\" >= \"logical\""));
1055
1056 /*
1057 * A physical replication slot(primary_slot_name) is required on the
1058 * primary to ensure that the rows needed by the standby are not removed
1059 * after restarting, so that the synchronized slot on the standby will not
1060 * be invalidated.
1061 */
1062 if (PrimarySlotName == NULL || *PrimarySlotName == '\0')
1063 {
1064 ereport(elevel,
1065 errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1066 /* translator: %s is a GUC variable name */
1067 errmsg("replication slot synchronization requires \"%s\" to be set", "primary_slot_name"));
1068 return false;
1069 }
1070
1071 /*
1072 * hot_standby_feedback must be enabled to cooperate with the physical
1073 * replication slot, which allows informing the primary about the xmin and
1074 * catalog_xmin values on the standby.
1075 */
1077 {
1078 ereport(elevel,
1079 errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1080 /* translator: %s is a GUC variable name */
1081 errmsg("replication slot synchronization requires \"%s\" to be enabled",
1082 "hot_standby_feedback"));
1083 return false;
1084 }
1085
1086 /*
1087 * The primary_conninfo is required to make connection to primary for
1088 * getting slots information.
1089 */
1090 if (PrimaryConnInfo == NULL || *PrimaryConnInfo == '\0')
1091 {
1092 ereport(elevel,
1093 errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1094 /* translator: %s is a GUC variable name */
1095 errmsg("replication slot synchronization requires \"%s\" to be set",
1096 "primary_conninfo"));
1097 return false;
1098 }
1099
1100 return true;
1101}
1102
1103/*
1104 * Re-read the config file.
1105 *
1106 * Exit if any of the slot sync GUCs have changed. The postmaster will
1107 * restart it.
1108 */
1109static void
1111{
1112 char *old_primary_conninfo = pstrdup(PrimaryConnInfo);
1113 char *old_primary_slotname = pstrdup(PrimarySlotName);
1114 bool old_sync_replication_slots = sync_replication_slots;
1115 bool old_hot_standby_feedback = hot_standby_feedback;
1116 bool conninfo_changed;
1117 bool primary_slotname_changed;
1118
1120
1121 ConfigReloadPending = false;
1123
1124 conninfo_changed = strcmp(old_primary_conninfo, PrimaryConnInfo) != 0;
1125 primary_slotname_changed = strcmp(old_primary_slotname, PrimarySlotName) != 0;
1126 pfree(old_primary_conninfo);
1127 pfree(old_primary_slotname);
1128
1129 if (old_sync_replication_slots != sync_replication_slots)
1130 {
1131 ereport(LOG,
1132 /* translator: %s is a GUC variable name */
1133 errmsg("replication slot synchronization worker will shut down because \"%s\" is disabled", "sync_replication_slots"));
1134 proc_exit(0);
1135 }
1136
1137 if (conninfo_changed ||
1138 primary_slotname_changed ||
1139 (old_hot_standby_feedback != hot_standby_feedback))
1140 {
1141 ereport(LOG,
1142 errmsg("replication slot synchronization worker will restart because of a parameter change"));
1143
1144 /*
1145 * Reset the last-start time for this worker so that the postmaster
1146 * can restart it without waiting for SLOTSYNC_RESTART_INTERVAL_SEC.
1147 */
1149
1150 proc_exit(0);
1151 }
1152
1153}
1154
1155/*
1156 * Interrupt handler for main loop of slot sync worker.
1157 */
1158static void
1160{
1162
1164 {
1165 ereport(LOG,
1166 errmsg("replication slot synchronization worker is shutting down on receiving SIGINT"));
1167
1168 proc_exit(0);
1169 }
1170
1173}
1174
1175/*
1176 * Connection cleanup function for slotsync worker.
1177 *
1178 * Called on slotsync worker exit.
1179 */
1180static void
1182{
1184
1186}
1187
1188/*
1189 * Cleanup function for slotsync worker.
1190 *
1191 * Called on slotsync worker exit.
1192 */
1193static void
1195{
1196 /*
1197 * We need to do slots cleanup here just like WalSndErrorCleanup() does.
1198 *
1199 * The startup process during promotion invokes ShutDownSlotSync() which
1200 * waits for slot sync to finish and it does that by checking the
1201 * 'syncing' flag. Thus the slot sync worker must be done with slots'
1202 * release and cleanup to avoid any dangling temporary slots or active
1203 * slots before it marks itself as finished syncing.
1204 */
1205
1206 /* Make sure active replication slots are released */
1207 if (MyReplicationSlot != NULL)
1209
1210 /* Also cleanup the temporary slots. */
1212
1214
1216
1217 /*
1218 * If syncing_slots is true, it indicates that the process errored out
1219 * without resetting the flag. So, we need to clean up shared memory and
1220 * reset the flag here.
1221 */
1222 if (syncing_slots)
1223 {
1224 SlotSyncCtx->syncing = false;
1225 syncing_slots = false;
1226 }
1227
1229}
1230
1231/*
1232 * Sleep for long enough that we believe it's likely that the slots on primary
1233 * get updated.
1234 *
1235 * If there is no slot activity the wait time between sync-cycles will double
1236 * (to a maximum of 30s). If there is some slot activity the wait time between
1237 * sync-cycles is reset to the minimum (200ms).
1238 */
1239static void
1240wait_for_slot_activity(bool some_slot_updated)
1241{
1242 int rc;
1243
1244 if (!some_slot_updated)
1245 {
1246 /*
1247 * No slots were updated, so double the sleep time, but not beyond the
1248 * maximum allowable value.
1249 */
1251 }
1252 else
1253 {
1254 /*
1255 * Some slots were updated since the last sleep, so reset the sleep
1256 * time.
1257 */
1259 }
1260
1261 rc = WaitLatch(MyLatch,
1263 sleep_ms,
1264 WAIT_EVENT_REPLICATION_SLOTSYNC_MAIN);
1265
1266 if (rc & WL_LATCH_SET)
1268}
1269
1270/*
1271 * Emit an error if a promotion or a concurrent sync call is in progress.
1272 * Otherwise, advertise that a sync is in progress.
1273 */
1274static void
1276{
1278
1279 /* The worker pid must not be already assigned in SlotSyncCtx */
1280 Assert(worker_pid == InvalidPid || SlotSyncCtx->pid == InvalidPid);
1281
1282 /*
1283 * Emit an error if startup process signaled the slot sync machinery to
1284 * stop. See comments atop SlotSyncCtxStruct.
1285 */
1287 {
1289 ereport(ERROR,
1290 errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1291 errmsg("cannot synchronize replication slots when standby promotion is ongoing"));
1292 }
1293
1294 if (SlotSyncCtx->syncing)
1295 {
1297 ereport(ERROR,
1298 errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1299 errmsg("cannot synchronize replication slots concurrently"));
1300 }
1301
1302 SlotSyncCtx->syncing = true;
1303
1304 /*
1305 * Advertise the required PID so that the startup process can kill the
1306 * slot sync worker on promotion.
1307 */
1308 SlotSyncCtx->pid = worker_pid;
1309
1311
1312 syncing_slots = true;
1313}
1314
1315/*
1316 * Reset syncing flag.
1317 */
1318static void
1320{
1322 SlotSyncCtx->syncing = false;
1324
1325 syncing_slots = false;
1326};
1327
1328/*
1329 * The main loop of our worker process.
1330 *
1331 * It connects to the primary server, fetches logical failover slots
1332 * information periodically in order to create and sync the slots.
1333 */
1334void
1335ReplSlotSyncWorkerMain(const void *startup_data, size_t startup_data_len)
1336{
1337 WalReceiverConn *wrconn = NULL;
1338 char *dbname;
1339 char *err;
1340 sigjmp_buf local_sigjmp_buf;
1341 StringInfoData app_name;
1342
1343 Assert(startup_data_len == 0);
1344
1346
1347 init_ps_display(NULL);
1348
1350
1351 /*
1352 * Create a per-backend PGPROC struct in shared memory. We must do this
1353 * before we access any shared memory.
1354 */
1355 InitProcess();
1356
1357 /*
1358 * Early initialization.
1359 */
1360 BaseInit();
1361
1362 Assert(SlotSyncCtx != NULL);
1363
1364 /*
1365 * If an exception is encountered, processing resumes here.
1366 *
1367 * We just need to clean up, report the error, and go away.
1368 *
1369 * If we do not have this handling here, then since this worker process
1370 * operates at the bottom of the exception stack, ERRORs turn into FATALs.
1371 * Therefore, we create our own exception handler to catch ERRORs.
1372 */
1373 if (sigsetjmp(local_sigjmp_buf, 1) != 0)
1374 {
1375 /* since not using PG_TRY, must reset error stack by hand */
1376 error_context_stack = NULL;
1377
1378 /* Prevents interrupts while cleaning up */
1380
1381 /* Report the error to the server log */
1383
1384 /*
1385 * We can now go away. Note that because we called InitProcess, a
1386 * callback was registered to do ProcKill, which will clean up
1387 * necessary state.
1388 */
1389 proc_exit(0);
1390 }
1391
1392 /* We can now handle ereport(ERROR) */
1393 PG_exception_stack = &local_sigjmp_buf;
1394
1395 /* Setup signal handling */
1398 pqsignal(SIGTERM, die);
1401 pqsignal(SIGUSR2, SIG_IGN);
1402 pqsignal(SIGPIPE, SIG_IGN);
1403 pqsignal(SIGCHLD, SIG_DFL);
1404
1406
1407 ereport(LOG, errmsg("slot sync worker started"));
1408
1409 /* Register it as soon as SlotSyncCtx->pid is initialized. */
1411
1412 /*
1413 * Establishes SIGALRM handler and initialize timeout module. It is needed
1414 * by InitPostgres to register different timeouts.
1415 */
1417
1418 /* Load the libpq-specific functions */
1419 load_file("libpqwalreceiver", false);
1420
1421 /*
1422 * Unblock signals (they were blocked when the postmaster forked us)
1423 */
1424 sigprocmask(SIG_SETMASK, &UnBlockSig, NULL);
1425
1426 /*
1427 * Set always-secure search path, so malicious users can't redirect user
1428 * code (e.g. operators).
1429 *
1430 * It's not strictly necessary since we won't be scanning or writing to
1431 * any user table locally, but it's good to retain it here for added
1432 * precaution.
1433 */
1434 SetConfigOption("search_path", "", PGC_SUSET, PGC_S_OVERRIDE);
1435
1437
1438 /*
1439 * Connect to the database specified by the user in primary_conninfo. We
1440 * need a database connection for walrcv_exec to work which we use to
1441 * fetch slot information from the remote node. See comments atop
1442 * libpqrcv_exec.
1443 *
1444 * We do not specify a specific user here since the slot sync worker will
1445 * operate as a superuser. This is safe because the slot sync worker does
1446 * not interact with user tables, eliminating the risk of executing
1447 * arbitrary code within triggers.
1448 */
1449 InitPostgres(dbname, InvalidOid, NULL, InvalidOid, 0, NULL);
1450
1452
1453 initStringInfo(&app_name);
1454 if (cluster_name[0])
1455 appendStringInfo(&app_name, "%s_%s", cluster_name, "slotsync worker");
1456 else
1457 appendStringInfoString(&app_name, "slotsync worker");
1458
1459 /*
1460 * Establish the connection to the primary server for slot
1461 * synchronization.
1462 */
1463 wrconn = walrcv_connect(PrimaryConnInfo, false, false, false,
1464 app_name.data, &err);
1465 pfree(app_name.data);
1466
1467 if (!wrconn)
1468 ereport(ERROR,
1469 errcode(ERRCODE_CONNECTION_FAILURE),
1470 errmsg("synchronization worker \"%s\" could not connect to the primary server: %s",
1471 app_name.data, err));
1472
1473 /*
1474 * Register the disconnection callback.
1475 *
1476 * XXX: This can be combined with previous cleanup registration of
1477 * slotsync_worker_onexit() but that will need the connection to be made
1478 * global and we want to avoid introducing global for this purpose.
1479 */
1481
1482 /*
1483 * Using the specified primary server connection, check that we are not a
1484 * cascading standby and slot configured in 'primary_slot_name' exists on
1485 * the primary server.
1486 */
1488
1489 /* Main loop to synchronize slots */
1490 for (;;)
1491 {
1492 bool some_slot_updated = false;
1493
1495
1496 some_slot_updated = synchronize_slots(wrconn);
1497
1498 wait_for_slot_activity(some_slot_updated);
1499 }
1500
1501 /*
1502 * The slot sync worker can't get here because it will only stop when it
1503 * receives a SIGINT from the startup process, or when there is an error.
1504 */
1505 Assert(false);
1506}
1507
1508/*
1509 * Update the inactive_since property for synced slots.
1510 *
1511 * Note that this function is currently called when we shutdown the slot
1512 * sync machinery.
1513 */
1514static void
1516{
1517 TimestampTz now = 0;
1518
1519 /*
1520 * We need to update inactive_since only when we are promoting standby to
1521 * correctly interpret the inactive_since if the standby gets promoted
1522 * without a restart. We don't want the slots to appear inactive for a
1523 * long time after promotion if they haven't been synchronized recently.
1524 * Whoever acquires the slot, i.e., makes the slot active, will reset it.
1525 */
1526 if (!StandbyMode)
1527 return;
1528
1529 /* The slot sync worker or SQL function mustn't be running by now */
1531
1532 LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
1533
1534 for (int i = 0; i < max_replication_slots; i++)
1535 {
1537
1538 /* Check if it is a synchronized slot */
1539 if (s->in_use && s->data.synced)
1540 {
1542
1543 /* The slot must not be acquired by any process */
1544 Assert(s->active_pid == 0);
1545
1546 /* Use the same inactive_since time for all the slots. */
1547 if (now == 0)
1549
1551 }
1552 }
1553
1554 LWLockRelease(ReplicationSlotControlLock);
1555}
1556
1557/*
1558 * Shut down the slot sync worker.
1559 *
1560 * This function sends signal to shutdown slot sync worker, if required. It
1561 * also waits till the slot sync worker has exited or
1562 * pg_sync_replication_slots() has finished.
1563 */
1564void
1566{
1567 pid_t worker_pid;
1568
1570
1571 SlotSyncCtx->stopSignaled = true;
1572
1573 /*
1574 * Return if neither the slot sync worker is running nor the function
1575 * pg_sync_replication_slots() is executing.
1576 */
1577 if (!SlotSyncCtx->syncing)
1578 {
1581 return;
1582 }
1583
1584 worker_pid = SlotSyncCtx->pid;
1585
1587
1588 if (worker_pid != InvalidPid)
1589 kill(worker_pid, SIGINT);
1590
1591 /* Wait for slot sync to end */
1592 for (;;)
1593 {
1594 int rc;
1595
1596 /* Wait a bit, we don't expect to have to wait long */
1597 rc = WaitLatch(MyLatch,
1599 10L, WAIT_EVENT_REPLICATION_SLOTSYNC_SHUTDOWN);
1600
1601 if (rc & WL_LATCH_SET)
1602 {
1605 }
1606
1608
1609 /* Ensure that no process is syncing the slots. */
1610 if (!SlotSyncCtx->syncing)
1611 break;
1612
1614 }
1615
1617
1619}
1620
1621/*
1622 * SlotSyncWorkerCanRestart
1623 *
1624 * Returns true if enough time (SLOTSYNC_RESTART_INTERVAL_SEC) has passed
1625 * since it was launched last. Otherwise returns false.
1626 *
1627 * This is a safety valve to protect against continuous respawn attempts if the
1628 * worker is dying immediately at launch. Note that since we will retry to
1629 * launch the worker from the postmaster main loop, we will get another
1630 * chance later.
1631 */
1632bool
1634{
1635 time_t curtime = time(NULL);
1636
1637 /* Return false if too soon since last start. */
1638 if ((unsigned int) (curtime - SlotSyncCtx->last_start_time) <
1639 (unsigned int) SLOTSYNC_RESTART_INTERVAL_SEC)
1640 return false;
1641
1642 SlotSyncCtx->last_start_time = curtime;
1643
1644 return true;
1645}
1646
1647/*
1648 * Is current process syncing replication slots?
1649 *
1650 * Could be either backend executing SQL function or slot sync worker.
1651 */
1652bool
1654{
1655 return syncing_slots;
1656}
1657
1658/*
1659 * Amount of shared memory required for slot synchronization.
1660 */
1661Size
1663{
1664 return sizeof(SlotSyncCtxStruct);
1665}
1666
1667/*
1668 * Allocate and initialize the shared memory of slot synchronization.
1669 */
1670void
1672{
1673 Size size = SlotSyncShmemSize();
1674 bool found;
1675
1677 ShmemInitStruct("Slot Sync Data", size, &found);
1678
1679 if (!found)
1680 {
1681 memset(SlotSyncCtx, 0, size);
1684 }
1685}
1686
1687/*
1688 * Error cleanup callback for slot sync SQL function.
1689 */
1690static void
1692{
1694
1695 /*
1696 * We need to do slots cleanup here just like WalSndErrorCleanup() does.
1697 *
1698 * The startup process during promotion invokes ShutDownSlotSync() which
1699 * waits for slot sync to finish and it does that by checking the
1700 * 'syncing' flag. Thus the SQL function must be done with slots' release
1701 * and cleanup to avoid any dangling temporary slots or active slots
1702 * before it marks itself as finished syncing.
1703 */
1704
1705 /* Make sure active replication slots are released */
1706 if (MyReplicationSlot != NULL)
1708
1709 /* Also cleanup the synced temporary slots. */
1711
1712 /*
1713 * The set syncing_slots indicates that the process errored out without
1714 * resetting the flag. So, we need to clean up shared memory and reset the
1715 * flag here.
1716 */
1717 if (syncing_slots)
1719
1721}
1722
1723/*
1724 * Synchronize the failover enabled replication slots using the specified
1725 * primary server connection.
1726 */
1727void
1729{
1731 {
1733
1735
1737
1738 /* Cleanup the synced temporary slots */
1740
1741 /* We are done with sync, so reset sync flag */
1743 }
1745}
sigset_t UnBlockSig
Definition: pqsignal.c:22
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1645
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1609
#define TextDatumGetCString(d)
Definition: builtins.h:98
#define NameStr(name)
Definition: c.h:717
#define Min(x, y)
Definition: c.h:975
#define UINT64_FORMAT
Definition: c.h:521
uint32 TransactionId
Definition: c.h:623
size_t Size
Definition: c.h:576
int64 TimestampTz
Definition: timestamp.h:39
Oid get_database_oid(const char *dbname, bool missing_ok)
Definition: dbcommands.c:3141
void load_file(const char *filename, bool restricted)
Definition: dfmgr.c:134
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1158
void EmitErrorReport(void)
Definition: elog.c:1692
int errdetail_internal(const char *fmt,...)
Definition: elog.c:1231
int errdetail(const char *fmt,...)
Definition: elog.c:1204
ErrorContextCallback * error_context_stack
Definition: elog.c:95
int errhint(const char *fmt,...)
Definition: elog.c:1318
int errcode(int sqlerrcode)
Definition: elog.c:854
int errmsg(const char *fmt,...)
Definition: elog.c:1071
sigjmp_buf * PG_exception_stack
Definition: elog.c:97
#define LOG
Definition: elog.h:31
#define DEBUG1
Definition: elog.h:30
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
void err(int eval, const char *fmt,...)
Definition: err.c:43
TupleTableSlot * MakeSingleTupleTableSlot(TupleDesc tupdesc, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1427
const TupleTableSlotOps TTSOpsMinimalTuple
Definition: execTuples.c:86
int MyProcPid
Definition: globals.c:48
struct Latch * MyLatch
Definition: globals.c:64
void ProcessConfigFile(GucContext context)
Definition: guc-file.l:120
void SetConfigOption(const char *name, const char *value, GucContext context, GucSource source)
Definition: guc.c:4332
@ PGC_S_OVERRIDE
Definition: guc.h:123
@ PGC_SUSET
Definition: guc.h:78
@ PGC_SIGHUP
Definition: guc.h:75
char * cluster_name
Definition: guc_tables.c:554
Assert(PointerIsAligned(start, uint64))
void SignalHandlerForShutdownRequest(SIGNAL_ARGS)
Definition: interrupt.c:109
volatile sig_atomic_t ShutdownRequestPending
Definition: interrupt.c:28
volatile sig_atomic_t ConfigReloadPending
Definition: interrupt.c:27
void SignalHandlerForConfigReload(SIGNAL_ARGS)
Definition: interrupt.c:65
void before_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:337
void proc_exit(int code)
Definition: ipc.c:104
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
int i
Definition: isn.c:77
void ResetLatch(Latch *latch)
Definition: latch.c:372
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:172
List * lappend(List *list, void *datum)
Definition: list.c:339
void list_free_deep(List *list)
Definition: list.c:1560
void LockSharedObject(Oid classid, Oid objid, uint16 objsubid, LOCKMODE lockmode)
Definition: lmgr.c:1082
void UnlockSharedObject(Oid classid, Oid objid, uint16 objsubid, LOCKMODE lockmode)
Definition: lmgr.c:1142
#define AccessShareLock
Definition: lockdefs.h:36
XLogRecPtr LogicalSlotAdvanceAndCheckSnapState(XLogRecPtr moveto, bool *found_consistent_snapshot)
Definition: logical.c:2044
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1182
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1902
@ LW_SHARED
Definition: lwlock.h:115
@ LW_EXCLUSIVE
Definition: lwlock.h:114
char * pstrdup(const char *in)
Definition: mcxt.c:2325
void pfree(void *pointer)
Definition: mcxt.c:2150
void * palloc0(Size size)
Definition: mcxt.c:1973
@ NormalProcessing
Definition: miscadmin.h:472
@ InitProcessing
Definition: miscadmin.h:471
#define GetProcessingMode()
Definition: miscadmin.h:481
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:123
#define AmLogicalSlotSyncWorkerProcess()
Definition: miscadmin.h:386
#define HOLD_INTERRUPTS()
Definition: miscadmin.h:134
#define SetProcessingMode(mode)
Definition: miscadmin.h:483
@ B_SLOTSYNC_WORKER
Definition: miscadmin.h:348
#define InvalidPid
Definition: miscadmin.h:32
BackendType MyBackendType
Definition: miscinit.c:64
void namestrcpy(Name name, const char *str)
Definition: name.c:233
void * arg
#define NIL
Definition: pg_list.h:68
#define foreach_ptr(type, var, lst)
Definition: pg_list.h:469
static XLogRecPtr DatumGetLSN(Datum X)
Definition: pg_lsn.h:22
#define die(msg)
#define pqsignal
Definition: port.h:531
void FloatExceptionHandler(SIGNAL_ARGS)
Definition: postgres.c:3075
static bool DatumGetBool(Datum X)
Definition: postgres.h:95
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:327
uintptr_t Datum
Definition: postgres.h:69
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:317
static TransactionId DatumGetTransactionId(Datum X)
Definition: postgres.h:267
#define InvalidOid
Definition: postgres_ext.h:35
unsigned int Oid
Definition: postgres_ext.h:30
void BaseInit(void)
Definition: postinit.c:612
void InitPostgres(const char *in_dbname, Oid dboid, const char *username, Oid useroid, bits32 flags, char *out_dbname)
Definition: postinit.c:719
TransactionId GetOldestSafeDecodingTransactionId(bool catalogOnly)
Definition: procarray.c:2945
void procsignal_sigusr1_handler(SIGNAL_ARGS)
Definition: procsignal.c:673
void init_ps_display(const char *fixed_part)
Definition: ps_status.c:269
char * quote_literal_cstr(const char *rawstr)
Definition: quote.c:103
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:387
void ReplicationSlotAcquire(const char *name, bool nowait, bool error_if_invalid)
Definition: slot.c:559
void ReplicationSlotCreate(const char *name, bool db_specific, ReplicationSlotPersistency persistency, bool two_phase, bool failover, bool synced)
Definition: slot.c:324
void ReplicationSlotDropAcquired(void)
Definition: slot.c:919
void ReplicationSlotMarkDirty(void)
Definition: slot.c:1061
ReplicationSlotInvalidationCause GetSlotInvalidationCause(const char *cause_name)
Definition: slot.c:2607
void ReplicationSlotsComputeRequiredXmin(bool already_locked)
Definition: slot.c:1100
void ReplicationSlotPersist(void)
Definition: slot.c:1078
ReplicationSlot * MyReplicationSlot
Definition: slot.c:147
void ReplicationSlotSave(void)
Definition: slot.c:1043
ReplicationSlot * SearchNamedReplicationSlot(const char *name, bool need_lock)
Definition: slot.c:479
void ReplicationSlotRelease(void)
Definition: slot.c:686
int max_replication_slots
Definition: slot.c:150
ReplicationSlotCtlData * ReplicationSlotCtl
Definition: slot.c:144
void ReplicationSlotsComputeRequiredLSN(void)
Definition: slot.c:1156
void ReplicationSlotCleanup(bool synced_only)
Definition: slot.c:775
@ RS_TEMPORARY
Definition: slot.h:40
ReplicationSlotInvalidationCause
Definition: slot.h:52
@ RS_INVAL_NONE
Definition: slot.h:53
#define SlotIsLogical(slot)
Definition: slot.h:221
static void ReplicationSlotSetInactiveSince(ReplicationSlot *s, TimestampTz ts, bool acquire_lock)
Definition: slot.h:239
static List * get_local_synced_slots(void)
Definition: slotsync.c:334
#define MIN_SLOTSYNC_WORKER_NAPTIME_MS
Definition: slotsync.c:114
#define PRIMARY_INFO_OUTPUT_COL_COUNT
static void slotsync_worker_disconnect(int code, Datum arg)
Definition: slotsync.c:1181
void SyncReplicationSlots(WalReceiverConn *wrconn)
Definition: slotsync.c:1728
static bool local_sync_slot_required(ReplicationSlot *local_slot, List *remote_slots)
Definition: slotsync.c:365
static void drop_local_obsolete_slots(List *remote_slot_list)
Definition: slotsync.c:418
static void reserve_wal_for_local_slot(XLogRecPtr restart_lsn)
Definition: slotsync.c:475
void ShutDownSlotSync(void)
Definition: slotsync.c:1565
static bool update_and_persist_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid)
Definition: slotsync.c:546
bool sync_replication_slots
Definition: slotsync.c:107
static SlotSyncCtxStruct * SlotSyncCtx
Definition: slotsync.c:104
static void slotsync_failure_callback(int code, Datum arg)
Definition: slotsync.c:1691
#define SLOTSYNC_COLUMN_COUNT
static long sleep_ms
Definition: slotsync.c:117
#define SLOTSYNC_RESTART_INTERVAL_SEC
Definition: slotsync.c:120
static void reset_syncing_flag()
Definition: slotsync.c:1319
char * CheckAndGetDbnameFromConninfo(void)
Definition: slotsync.c:1016
static bool syncing_slots
Definition: slotsync.c:127
struct RemoteSlot RemoteSlot
struct SlotSyncCtxStruct SlotSyncCtxStruct
#define MAX_SLOTSYNC_WORKER_NAPTIME_MS
Definition: slotsync.c:115
static bool synchronize_slots(WalReceiverConn *wrconn)
Definition: slotsync.c:792
bool SlotSyncWorkerCanRestart(void)
Definition: slotsync.c:1633
static bool synchronize_one_slot(RemoteSlot *remote_slot, Oid remote_dbid)
Definition: slotsync.c:610
static void wait_for_slot_activity(bool some_slot_updated)
Definition: slotsync.c:1240
static void slotsync_reread_config(void)
Definition: slotsync.c:1110
void SlotSyncShmemInit(void)
Definition: slotsync.c:1671
static bool update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid, bool *found_consistent_snapshot, bool *remote_slot_precedes)
Definition: slotsync.c:167
static void slotsync_worker_onexit(int code, Datum arg)
Definition: slotsync.c:1194
static void check_and_set_sync_info(pid_t worker_pid)
Definition: slotsync.c:1275
static void update_synced_slots_inactive_since(void)
Definition: slotsync.c:1515
bool ValidateSlotSyncParams(int elevel)
Definition: slotsync.c:1043
static void validate_remote_info(WalReceiverConn *wrconn)
Definition: slotsync.c:938
bool IsSyncingReplicationSlots(void)
Definition: slotsync.c:1653
void ReplSlotSyncWorkerMain(const void *startup_data, size_t startup_data_len)
Definition: slotsync.c:1335
Size SlotSyncShmemSize(void)
Definition: slotsync.c:1662
static void ProcessSlotSyncInterrupts(WalReceiverConn *wrconn)
Definition: slotsync.c:1159
bool SnapBuildSnapshotExists(XLogRecPtr lsn)
Definition: snapbuild.c:2050
#define SpinLockInit(lock)
Definition: spin.h:57
#define SpinLockRelease(lock)
Definition: spin.h:61
#define SpinLockAcquire(lock)
Definition: spin.h:59
void InitProcess(void)
Definition: proc.c:391
char * dbname
Definition: streamutil.c:49
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:145
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:230
void initStringInfo(StringInfo str)
Definition: stringinfo.c:97
Definition: pg_list.h:54
bool two_phase
Definition: slotsync.c:138
char * plugin
Definition: slotsync.c:136
char * name
Definition: slotsync.c:135
char * database
Definition: slotsync.c:137
bool failover
Definition: slotsync.c:139
ReplicationSlotInvalidationCause invalidated
Definition: slotsync.c:146
XLogRecPtr confirmed_lsn
Definition: slotsync.c:141
XLogRecPtr restart_lsn
Definition: slotsync.c:140
XLogRecPtr two_phase_at
Definition: slotsync.c:142
TransactionId catalog_xmin
Definition: slotsync.c:143
ReplicationSlot replication_slots[1]
Definition: slot.h:232
TransactionId catalog_xmin
Definition: slot.h:97
XLogRecPtr confirmed_flush
Definition: slot.h:111
ReplicationSlotPersistency persistency
Definition: slot.h:81
ReplicationSlotInvalidationCause invalidated
Definition: slot.h:103
TransactionId effective_catalog_xmin
Definition: slot.h:182
slock_t mutex
Definition: slot.h:158
pid_t active_pid
Definition: slot.h:164
bool in_use
Definition: slot.h:161
ReplicationSlotPersistentData data
Definition: slot.h:185
time_t last_start_time
Definition: slotsync.c:100
Tuplestorestate * tuplestore
Definition: walreceiver.h:223
TupleDesc tupledesc
Definition: walreceiver.h:224
WalRcvExecStatus status
Definition: walreceiver.h:220
Definition: c.h:712
void InitializeTimeouts(void)
Definition: timeout.c:470
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition: transam.c:314
#define InvalidTransactionId
Definition: transam.h:31
#define TransactionIdIsValid(xid)
Definition: transam.h:41
bool tuplestore_gettupleslot(Tuplestorestate *state, bool forward, bool copy, TupleTableSlot *slot)
Definition: tuplestore.c:1130
static Datum slot_getattr(TupleTableSlot *slot, int attnum, bool *isnull)
Definition: tuptable.h:399
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:458
#define WL_TIMEOUT
Definition: waiteventset.h:37
#define WL_EXIT_ON_PM_DEATH
Definition: waiteventset.h:39
#define WL_LATCH_SET
Definition: waiteventset.h:34
static WalReceiverConn * wrconn
Definition: walreceiver.c:93
bool hot_standby_feedback
Definition: walreceiver.c:90
#define walrcv_connect(conninfo, replication, logical, must_use_password, appname, err)
Definition: walreceiver.h:435
@ WALRCV_OK_TUPLES
Definition: walreceiver.h:207
static void walrcv_clear_result(WalRcvExecResult *walres)
Definition: walreceiver.h:471
#define walrcv_get_dbname_from_conninfo(conninfo)
Definition: walreceiver.h:445
#define walrcv_exec(conn, exec, nRetTypes, retTypes)
Definition: walreceiver.h:465
#define walrcv_disconnect(conn)
Definition: walreceiver.h:467
XLogRecPtr GetWalRcvFlushRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI)
XLogRecPtr GetStandbyFlushRecPtr(TimeLineID *tli)
Definition: walsender.c:3533
#define SIGCHLD
Definition: win32_port.h:168
#define SIGHUP
Definition: win32_port.h:158
#define SIGPIPE
Definition: win32_port.h:163
#define kill(pid, sig)
Definition: win32_port.h:493
#define SIGUSR1
Definition: win32_port.h:170
#define SIGUSR2
Definition: win32_port.h:171
bool IsTransactionState(void)
Definition: xact.c:387
void StartTransactionCommand(void)
Definition: xact.c:3059
void CommitTransactionCommand(void)
Definition: xact.c:3157
XLogSegNo XLogGetLastRemovedSegno(void)
Definition: xlog.c:3897
int wal_level
Definition: xlog.c:131
int wal_segment_size
Definition: xlog.c:143
XLogSegNo XLogGetOldestSegno(TimeLineID tli)
Definition: xlog.c:3913
@ WAL_LEVEL_LOGICAL
Definition: xlog.h:76
#define XLogSegNoOffsetToRecPtr(segno, offset, wal_segsz_bytes, dest)
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
uint32 TimeLineID
Definition: xlogdefs.h:59
uint64 XLogSegNo
Definition: xlogdefs.h:48
char * PrimarySlotName
Definition: xlogrecovery.c:98
bool StandbyMode
Definition: xlogrecovery.c:148
char * PrimaryConnInfo
Definition: xlogrecovery.c:97