Check slot->restart_lsn validity in a few more places
authorAlvaro Herrera <alvherre@alvh.no-ip.org>
Wed, 29 Apr 2020 00:39:04 +0000 (20:39 -0400)
committerAlvaro Herrera <alvherre@alvh.no-ip.org>
Wed, 29 Apr 2020 00:39:04 +0000 (20:39 -0400)
Lack of these checks could cause visible misbehavior, including
assertion failures.  This was missed in commit c6550776394e, whereby
restart_lsn becomes invalid when the size limit is exceeded.

Also reword some existing error messages, and add errdetail(), so that
the reported errors all match in spirit.

Author: Kyotaro Horiguchi <horikyota.ntt@gmail.com>
Reviewed-by: Álvaro Herrera <alvherre@alvh.no-ip.org>
Discussion: https://postgr.es/m/20200408.093710.447591748588426656.horikyota.ntt@gmail.com

contrib/test_decoding/expected/slot.out
src/backend/replication/logical/logicalfuncs.c
src/backend/replication/slotfuncs.c
src/backend/replication/walsender.c

index 1000171530f462f2628dc8bdd00a489a6360f8c8..ea72bf9f1573c64e3a5be03aa6c0daf9cad6a40d 100644 (file)
@@ -143,7 +143,8 @@ SELECT slot_name FROM pg_create_physical_replication_slot('regression_slot3');
 SELECT pg_replication_slot_advance('regression_slot3', '0/0'); -- invalid LSN
 ERROR:  invalid target WAL LSN
 SELECT pg_replication_slot_advance('regression_slot3', '0/1'); -- error
-ERROR:  cannot advance replication slot that has not previously reserved WAL
+ERROR:  replication slot "regression_slot3" cannot be advanced
+DETAIL:  This slot has never previously reserved WAL, or has been invalidated.
 SELECT pg_drop_replication_slot('regression_slot3');
  pg_drop_replication_slot 
 --------------------------
index f5384f1df8c9f639a6be51c8bfb3434b6e95a9d4..fded8e82908b9aa19e5d9243007d03da9fb54711 100644 (file)
@@ -237,6 +237,19 @@ pg_logical_slot_get_changes_guts(FunctionCallInfo fcinfo, bool confirm, bool bin
                                    LogicalOutputPrepareWrite,
                                    LogicalOutputWrite, NULL);
 
+       /*
+        * After the sanity checks in CreateDecodingContext, make sure the
+        * restart_lsn is valid.  Avoid "cannot get changes" wording in this
+        * errmsg because that'd be confusingly ambiguous about no changes
+        * being available.
+        */
+       if (XLogRecPtrIsInvalid(MyReplicationSlot->data.restart_lsn))
+           ereport(ERROR,
+                   (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                    errmsg("can no longer get changes from replication slot \"%s\"",
+                           NameStr(*name)),
+                    errdetail("This slot has never previously reserved WAL, or has been invalidated.")));
+
        MemoryContextSwitchTo(oldcontext);
 
        /*
index f776de3df7ac87867b3a7ddc8445d60f21e05e8b..ae751e94e765f2333ddf9a40675f13c9661d12c5 100644 (file)
@@ -598,7 +598,9 @@ pg_replication_slot_advance(PG_FUNCTION_ARGS)
    if (XLogRecPtrIsInvalid(MyReplicationSlot->data.restart_lsn))
        ereport(ERROR,
                (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-                errmsg("cannot advance replication slot that has not previously reserved WAL")));
+                errmsg("replication slot \"%s\" cannot be advanced",
+                       NameStr(*slotname)),
+                errdetail("This slot has never previously reserved WAL, or has been invalidated.")));
 
    /*
     * Check if the slot is not moving backwards.  Physical slots rely simply
index 6c87f6087ced287652436d564a5c69480ef06628..8b55bbfcb2ec31b2859bf03f87d5bbc9b19c0412 100644 (file)
@@ -600,6 +600,12 @@ StartReplication(StartReplicationCmd *cmd)
            ereport(ERROR,
                    (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
                     errmsg("cannot use a logical replication slot for physical replication")));
+
+       /*
+        * We don't need to verify the slot's restart_lsn here; instead we
+        * rely on the caller requesting the starting point to use.  If the
+        * WAL segment doesn't exist, we'll fail later.
+        */
    }
 
    /*
@@ -1134,6 +1140,13 @@ StartLogicalReplication(StartReplicationCmd *cmd)
 
    (void) ReplicationSlotAcquire(cmd->slotname, SAB_Error);
 
+   if (XLogRecPtrIsInvalid(MyReplicationSlot->data.restart_lsn))
+       ereport(ERROR,
+               (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                errmsg("cannot read from logical replication slot \"%s\"",
+                       cmd->slotname),
+                errdetail("This slot has been invalidated because it exceeded the maximum reserved size.")));
+
    /*
     * Force a disconnect, so that the decoding code doesn't need to care
     * about an eventual switch from running in recovery, to running in a
@@ -1159,7 +1172,6 @@ StartLogicalReplication(StartReplicationCmd *cmd)
                              WalSndPrepareWrite, WalSndWriteData,
                              WalSndUpdateProgress);
 
-
    WalSndSetState(WALSNDSTATE_CATCHUP);
 
    /* Send a CopyBothResponse message, and start streaming */