summaryrefslogtreecommitdiff
path: root/src/backend/commands
diff options
context:
space:
mode:
authorAmit Kapila2022-09-12 07:10:57 +0000
committerAmit Kapila2022-09-12 07:10:57 +0000
commit88f488319bac051b874bcec87941217e25e0e126 (patch)
tree871c8a0c958185f77e3ff91c5ddb271e9b5a6763 /src/backend/commands
parent5015e1e1b58f81a036e4ad16291ef4b3bb7a596c (diff)
Make the tablesync worker's replication origin drop logic robust.
In commit f6c5edb8ab, we started to drop the replication origin slots before tablesync worker exits to avoid consuming more slots than required. We were dropping the replication origin in the same transaction where we were marking the tablesync state as SYNCDONE. Now, if there is any error after we have dropped the origin but before we commit the containing transaction, the in-memory state of replication progress won't be rolled back. Due to this, after the restart, tablesync worker can start streaming from the wrong location and can apply the already processed transaction. To fix this, we need to opportunistically drop the origin after marking the tablesync state as SYNCDONE. Even, if the tablesync worker fails to remove the replication origin before exit, the apply worker ensures to clean it up afterward. Reported by Tom Lane as per buildfarm. Diagnosed-by: Masahiko Sawada Author: Hou Zhijie Reviewed-By: Masahiko Sawada, Amit Kapila Discussion: https://postgr.es/m/20220714115155.GA5439@depesz.com Discussion: https://postgr.es/m/CAD21AoAw0Oofi4kiDpJBOwpYyBBBkJj=sLUOn4Gd2GjUAKG-fw@mail.gmail.com
Diffstat (limited to 'src/backend/commands')
-rw-r--r--src/backend/commands/subscriptioncmds.c25
1 files changed, 11 insertions, 14 deletions
diff --git a/src/backend/commands/subscriptioncmds.c b/src/backend/commands/subscriptioncmds.c
index 66d800f0cff..1024d51dca8 100644
--- a/src/backend/commands/subscriptioncmds.c
+++ b/src/backend/commands/subscriptioncmds.c
@@ -931,10 +931,10 @@ AlterSubscription_refresh(Subscription *sub, bool copy_data,
logicalrep_worker_stop(sub->oid, relid);
/*
- * For READY state and SYNCDONE state, we would have already
- * dropped the tablesync origin.
+ * For READY state, we would have already dropped the
+ * tablesync origin.
*/
- if (state != SUBREL_STATE_READY && state != SUBREL_STATE_SYNCDONE)
+ if (state != SUBREL_STATE_READY)
{
char originname[NAMEDATALEN];
@@ -942,8 +942,11 @@ AlterSubscription_refresh(Subscription *sub, bool copy_data,
* Drop the tablesync's origin tracking if exists.
*
* It is possible that the origin is not yet created for
- * tablesync worker so passing missing_ok = true. This can
- * happen for the states before SUBREL_STATE_FINISHEDCOPY.
+ * tablesync worker, this can happen for the states before
+ * SUBREL_STATE_FINISHEDCOPY. The tablesync worker or
+ * apply worker can also concurrently try to drop the
+ * origin and by this time the origin might be already
+ * removed. For these reasons, passing missing_ok = true.
*/
ReplicationOriginNameForTablesync(sub->oid, relid, originname,
sizeof(originname));
@@ -1516,19 +1519,13 @@ DropSubscription(DropSubscriptionStmt *stmt, bool isTopLevel)
/*
* Drop the tablesync's origin tracking if exists.
*
- * For SYNCDONE/READY states, the tablesync origin tracking is known
- * to have already been dropped by the tablesync worker.
- *
* It is possible that the origin is not yet created for tablesync
* worker so passing missing_ok = true. This can happen for the states
* before SUBREL_STATE_FINISHEDCOPY.
*/
- if (rstate->state != SUBREL_STATE_SYNCDONE)
- {
- ReplicationOriginNameForTablesync(subid, relid, originname,
- sizeof(originname));
- replorigin_drop_by_name(originname, true, false);
- }
+ ReplicationOriginNameForTablesync(subid, relid, originname,
+ sizeof(originname));
+ replorigin_drop_by_name(originname, true, false);
}
/* Clean up dependencies */