diff options
author | Tom Lane | 2010-07-03 20:43:58 +0000 |
---|---|---|
committer | Tom Lane | 2010-07-03 20:43:58 +0000 |
commit | e76c1a0f4d2127f11c72c02b3d73a5dcb4517173 (patch) | |
tree | 7a81ef438a2ef591dda9f8cf1c9e4eecfc5082e7 /src/include | |
parent | e6a7416e28bacef6311be20375c8498b23faeb65 (diff) |
Replace max_standby_delay with two parameters, max_standby_archive_delay and
max_standby_streaming_delay, and revise the implementation to avoid assuming
that timestamps found in WAL records can meaningfully be compared to clock
time on the standby server. Instead, the delay limits are compared to the
elapsed time since we last obtained a new WAL segment from archive or since
we were last "caught up" to WAL data arriving via streaming replication.
This avoids problems with clock skew between primary and standby, as well
as other corner cases that the original coding would misbehave in, such
as the primary server having significant idle time between transactions.
Per my complaint some time ago and considerable ensuing discussion.
Do some desultory editing on the hot standby documentation, too.
Diffstat (limited to 'src/include')
-rw-r--r-- | src/include/access/xlog.h | 21 | ||||
-rw-r--r-- | src/include/replication/walreceiver.h | 34 | ||||
-rw-r--r-- | src/include/storage/standby.h | 7 |
3 files changed, 39 insertions, 23 deletions
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index cbadd7f91fb..27e7f404d8d 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.113 2010/06/17 16:41:25 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.114 2010/07/03 20:43:58 tgl Exp $ */ #ifndef XLOG_H #define XLOG_H @@ -135,22 +135,25 @@ typedef struct XLogRecData extern PGDLLIMPORT TimeLineID ThisTimeLineID; /* current TLI */ /* - * Prior to 8.4, all activity during recovery was carried out by Startup + * Prior to 8.4, all activity during recovery was carried out by the startup * process. This local variable continues to be used in many parts of the - * code to indicate actions taken by RecoveryManagers. Other processes who - * potentially perform work during recovery should check RecoveryInProgress() - * see XLogCtl notes in xlog.c + * code to indicate actions taken by RecoveryManagers. Other processes that + * potentially perform work during recovery should check RecoveryInProgress(). + * See XLogCtl notes in xlog.c. */ extern bool InRecovery; /* * Like InRecovery, standbyState is only valid in the startup process. + * In all other processes it will have the value STANDBY_DISABLED (so + * InHotStandby will read as FALSE). * * In DISABLED state, we're performing crash recovery or hot standby was * disabled in recovery.conf. * - * In INITIALIZED state, we haven't yet received a RUNNING_XACTS or shutdown - * checkpoint record to initialize our master transaction tracking system. + * In INITIALIZED state, we've run InitRecoveryTransactionEnvironment, but + * we haven't yet processed a RUNNING_XACTS or shutdown-checkpoint WAL record + * to initialize our master-transaction tracking system. * * When the transaction tracking is initialized, we enter the SNAPSHOT_PENDING * state. The tracked information might still be incomplete, so we can't allow @@ -168,6 +171,7 @@ typedef enum STANDBY_SNAPSHOT_PENDING, STANDBY_SNAPSHOT_READY } HotStandbyState; + extern HotStandbyState standbyState; #define InHotStandby (standbyState >= STANDBY_SNAPSHOT_PENDING) @@ -193,7 +197,6 @@ extern int XLogArchiveTimeout; extern bool XLogArchiveMode; extern char *XLogArchiveCommand; extern bool EnableHotStandby; -extern int MaxStandbyDelay; extern bool log_checkpoints; /* WAL levels */ @@ -279,7 +282,7 @@ extern void issue_xlog_fsync(int fd, uint32 log, uint32 seg); extern bool RecoveryInProgress(void); extern bool XLogInsertAllowed(void); -extern TimestampTz GetLatestXLogTime(void); +extern void GetXLogReceiptTime(TimestampTz *rtime, bool *fromStream); extern void UpdateControlFile(void); extern uint64 GetSystemIdentifier(void); diff --git a/src/include/replication/walreceiver.h b/src/include/replication/walreceiver.h index 5dcaeba3f33..734380ee4f8 100644 --- a/src/include/replication/walreceiver.h +++ b/src/include/replication/walreceiver.h @@ -5,7 +5,7 @@ * * Portions Copyright (c) 2010-2010, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/include/replication/walreceiver.h,v 1.9 2010/06/03 22:17:32 tgl Exp $ + * $PostgreSQL: pgsql/src/include/replication/walreceiver.h,v 1.10 2010/07/03 20:43:58 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -41,25 +41,35 @@ typedef enum typedef struct { /* - * connection string; is used for walreceiver to connect with the primary. - */ - char conninfo[MAXCONNINFO]; - - /* - * PID of currently active walreceiver process, and the current state. + * PID of currently active walreceiver process, its current state and + * start time (actually, the time at which it was requested to be started). */ pid_t pid; WalRcvState walRcvState; pg_time_t startTime; /* - * receivedUpto-1 is the last byte position that has been already - * received. When startup process starts the walreceiver, it sets this to - * the point where it wants the streaming to begin. After that, - * walreceiver updates this whenever it flushes the received WAL. + * receivedUpto-1 is the last byte position that has already been + * received. When startup process starts the walreceiver, it sets + * receivedUpto to the point where it wants the streaming to begin. + * After that, walreceiver updates this whenever it flushes the received + * WAL to disk. */ XLogRecPtr receivedUpto; + /* + * latestChunkStart is the starting byte position of the current "batch" + * of received WAL. It's actually the same as the previous value of + * receivedUpto before the last flush to disk. Startup process can use + * this to detect whether it's keeping up or not. + */ + XLogRecPtr latestChunkStart; + + /* + * connection string; is used for walreceiver to connect with the primary. + */ + char conninfo[MAXCONNINFO]; + slock_t mutex; /* locks shared variables shown above */ } WalRcvData; @@ -83,6 +93,6 @@ extern void ShutdownWalRcv(void); extern bool WalRcvInProgress(void); extern XLogRecPtr WaitNextXLogAvailable(XLogRecPtr recptr, bool *finished); extern void RequestXLogStreaming(XLogRecPtr recptr, const char *conninfo); -extern XLogRecPtr GetWalRcvWriteRecPtr(void); +extern XLogRecPtr GetWalRcvWriteRecPtr(XLogRecPtr *latestChunkStart); #endif /* _WALRECEIVER_H */ diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h index 9159301a168..0654c5bcccf 100644 --- a/src/include/storage/standby.h +++ b/src/include/storage/standby.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/standby.h,v 1.10 2010/05/13 11:15:38 sriggs Exp $ + * $PostgreSQL: pgsql/src/include/storage/standby.h,v 1.11 2010/07/03 20:43:58 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -19,7 +19,10 @@ #include "storage/procsignal.h" #include "storage/relfilenode.h" +/* User-settable GUC parameters */ extern int vacuum_defer_cleanup_age; +extern int max_standby_archive_delay; +extern int max_standby_streaming_delay; extern void InitRecoveryTransactionEnvironment(void); extern void ShutdownRecoveryTransactionEnvironment(void); @@ -83,7 +86,7 @@ extern void standby_desc(StringInfo buf, uint8 xl_info, char *rec); /* * Declarations for GetRunningTransactionData(). Similar to Snapshots, but * not quite. This has nothing at all to do with visibility on this server, - * so this is completely separate from snapmgr.c and snapmgr.h + * so this is completely separate from snapmgr.c and snapmgr.h. * This data is important for creating the initial snapshot state on a * standby server. We need lots more information than a normal snapshot, * hence we use a specific data structure for our needs. This data |