summaryrefslogtreecommitdiff
path: root/src/include
diff options
context:
space:
mode:
authorAndres Freund2017-06-06 01:53:41 +0000
committerAndres Freund2017-06-06 02:18:15 +0000
commitc6c333436491a292d56044ed6e167e2bdee015a2 (patch)
treeb481f8fb4845910e9adce27a7a8b4896ed22a035 /src/include
parent47fd420fb4d3e77dde960312f8672c82b14ecbad (diff)
Prevent possibility of panics during shutdown checkpoint.
When the checkpointer writes the shutdown checkpoint, it checks afterwards whether any WAL has been written since it started and throws a PANIC if so. At that point, only walsenders are still active, so one might think this could not happen, but walsenders can also generate WAL, for instance in BASE_BACKUP and logical decoding related commands (e.g. via hint bits). So they can trigger this panic if such a command is run while the shutdown checkpoint is being written. To fix this, divide the walsender shutdown into two phases. First, checkpointer, itself triggered by postmaster, sends a PROCSIG_WALSND_INIT_STOPPING signal to all walsenders. If the backend is idle or runs an SQL query this causes the backend to shutdown, if logical replication is in progress all existing WAL records are processed followed by a shutdown. Otherwise this causes the walsender to switch to the "stopping" state. In this state, the walsender will reject any further replication commands. The checkpointer begins the shutdown checkpoint once all walsenders are confirmed as stopping. When the shutdown checkpoint finishes, the postmaster sends us SIGUSR2. This instructs walsender to send any outstanding WAL, including the shutdown checkpoint record, wait for it to be replicated to the standby, and then exit. Author: Andres Freund, based on an earlier patch by Michael Paquier Reported-By: Fujii Masao, Andres Freund Reviewed-By: Michael Paquier Discussion: https://postgr.es/m/20170602002912.tqlwn4gymzlxpvs2@alap3.anarazel.de Backpatch: 9.4, where logical decoding was introduced
Diffstat (limited to 'src/include')
-rw-r--r--src/include/replication/walsender.h3
-rw-r--r--src/include/replication/walsender_private.h3
-rw-r--r--src/include/storage/procsignal.h1
3 files changed, 6 insertions, 1 deletions
diff --git a/src/include/replication/walsender.h b/src/include/replication/walsender.h
index 2ca903872e4..c50e450ec2a 100644
--- a/src/include/replication/walsender.h
+++ b/src/include/replication/walsender.h
@@ -44,6 +44,9 @@ extern void WalSndSignals(void);
extern Size WalSndShmemSize(void);
extern void WalSndShmemInit(void);
extern void WalSndWakeup(void);
+extern void WalSndInitStopping(void);
+extern void WalSndWaitStopping(void);
+extern void HandleWalSndInitStopping(void);
extern void WalSndRqstFileReload(void);
/*
diff --git a/src/include/replication/walsender_private.h b/src/include/replication/walsender_private.h
index 2c59056cefd..36311e124c4 100644
--- a/src/include/replication/walsender_private.h
+++ b/src/include/replication/walsender_private.h
@@ -24,7 +24,8 @@ typedef enum WalSndState
WALSNDSTATE_STARTUP = 0,
WALSNDSTATE_BACKUP,
WALSNDSTATE_CATCHUP,
- WALSNDSTATE_STREAMING
+ WALSNDSTATE_STREAMING,
+ WALSNDSTATE_STOPPING
} WalSndState;
/*
diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h
index d068dde5d76..553f0f43f7e 100644
--- a/src/include/storage/procsignal.h
+++ b/src/include/storage/procsignal.h
@@ -32,6 +32,7 @@ typedef enum
PROCSIG_CATCHUP_INTERRUPT, /* sinval catchup interrupt */
PROCSIG_NOTIFY_INTERRUPT, /* listen/notify interrupt */
PROCSIG_PARALLEL_MESSAGE, /* message from cooperating parallel backend */
+ PROCSIG_WALSND_INIT_STOPPING, /* ask walsenders to prepare for shutdown */
/* Recovery conflict reasons */
PROCSIG_RECOVERY_CONFLICT_DATABASE,