Fix concurrent update trigger issues with MERGE in a CTE.
authorDean Rasheed <dean.a.rasheed@gmail.com>
Fri, 18 Jul 2025 08:55:43 +0000 (09:55 +0100)
committerDean Rasheed <dean.a.rasheed@gmail.com>
Fri, 18 Jul 2025 08:55:43 +0000 (09:55 +0100)
If a MERGE inside a CTE attempts an UPDATE or DELETE on a table with
BEFORE ROW triggers, and a concurrent UPDATE or DELETE happens, the
merge code would fail (crashing in the case of an UPDATE action, and
potentially executing the wrong action for a DELETE action).

This is the same issue that 9321c79c86 attempted to fix, except now
for a MERGE inside a CTE. As noted in 9321c79c86, what needs to happen
is for the trigger code to exit early, returning the TM_Result and
TM_FailureData information to the merge code, if a concurrent
modification is detected, rather than attempting to do an EPQ
recheck. The merge code will then do its own rechecking, and rescan
the action list, potentially executing a different action in light of
the concurrent update. In particular, the trigger code must never call
ExecGetUpdateNewTuple() for MERGE, since that is bound to fail because
MERGE has its own per-action projection information.

Commit 9321c79c86 did this using estate->es_plannedstmt->commandType
in the trigger code to detect that a MERGE was being executed, which
is fine for a plain MERGE command, but does not work for a MERGE
inside a CTE. Fix by passing that information to the trigger code as
an additional parameter passed to ExecBRUpdateTriggers() and
ExecBRDeleteTriggers().

Back-patch as far as v17 only, since MERGE cannot appear inside a CTE
prior to that. Additionally, take care to preserve the trigger ABI in
v17 (though not in v18, which is still in beta).

Bug: #18986
Reported-by: Yaroslav Syrytsia <me@ys.lc>
Author: Dean Rasheed <dean.a.rasheed@gmail.com>
Reviewed-by: Michael Paquier <michael@paquier.xyz>
Discussion: https://postgr.es/m/18986-e7a8aac3d339fa47@postgresql.org
Backpatch-through: 17

src/backend/commands/trigger.c
src/backend/executor/execReplication.c
src/backend/executor/nodeModifyTable.c
src/include/commands/trigger.h
src/test/isolation/expected/merge-match-recheck.out
src/test/isolation/specs/merge-match-recheck.spec

index 67f8e70f9c1666f76ca3f3d38a80403b4e714b38..7dc121f73f17e355ec6be59cc90935b807d6cb47 100644 (file)
@@ -80,6 +80,7 @@ static bool GetTupleForTrigger(EState *estate,
                               ItemPointer tid,
                               LockTupleMode lockmode,
                               TupleTableSlot *oldslot,
+                              bool do_epq_recheck,
                               TupleTableSlot **epqslot,
                               TM_Result *tmresultp,
                               TM_FailureData *tmfdp);
@@ -2693,7 +2694,8 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
                     HeapTuple fdw_trigtuple,
                     TupleTableSlot **epqslot,
                     TM_Result *tmresult,
-                    TM_FailureData *tmfd)
+                    TM_FailureData *tmfd,
+                    bool is_merge_delete)
 {
    TupleTableSlot *slot = ExecGetTriggerOldSlot(estate, relinfo);
    TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
@@ -2708,9 +2710,17 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
    {
        TupleTableSlot *epqslot_candidate = NULL;
 
+       /*
+        * Get a copy of the on-disk tuple we are planning to delete.  In
+        * general, if the tuple has been concurrently updated, we should
+        * recheck it using EPQ.  However, if this is a MERGE DELETE action,
+        * we skip this EPQ recheck and leave it to the caller (it must do
+        * additional rechecking, and might end up executing a different
+        * action entirely).
+        */
        if (!GetTupleForTrigger(estate, epqstate, relinfo, tupleid,
-                               LockTupleExclusive, slot, &epqslot_candidate,
-                               tmresult, tmfd))
+                               LockTupleExclusive, slot, !is_merge_delete,
+                               &epqslot_candidate, tmresult, tmfd))
            return false;
 
        /*
@@ -2800,6 +2810,7 @@ ExecARDeleteTriggers(EState *estate,
                               tupleid,
                               LockTupleExclusive,
                               slot,
+                              false,
                               NULL,
                               NULL,
                               NULL);
@@ -2944,7 +2955,8 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
                     HeapTuple fdw_trigtuple,
                     TupleTableSlot *newslot,
                     TM_Result *tmresult,
-                    TM_FailureData *tmfd)
+                    TM_FailureData *tmfd,
+                    bool is_merge_update)
 {
    TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
    TupleTableSlot *oldslot = ExecGetTriggerOldSlot(estate, relinfo);
@@ -2965,10 +2977,17 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
    {
        TupleTableSlot *epqslot_candidate = NULL;
 
-       /* get a copy of the on-disk tuple we are planning to update */
+       /*
+        * Get a copy of the on-disk tuple we are planning to update.  In
+        * general, if the tuple has been concurrently updated, we should
+        * recheck it using EPQ.  However, if this is a MERGE UPDATE action,
+        * we skip this EPQ recheck and leave it to the caller (it must do
+        * additional rechecking, and might end up executing a different
+        * action entirely).
+        */
        if (!GetTupleForTrigger(estate, epqstate, relinfo, tupleid,
-                               lockmode, oldslot, &epqslot_candidate,
-                               tmresult, tmfd))
+                               lockmode, oldslot, !is_merge_update,
+                               &epqslot_candidate, tmresult, tmfd))
            return false;       /* cancel the update action */
 
        /*
@@ -3142,6 +3161,7 @@ ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
                               tupleid,
                               LockTupleExclusive,
                               oldslot,
+                              false,
                               NULL,
                               NULL,
                               NULL);
@@ -3298,6 +3318,7 @@ GetTupleForTrigger(EState *estate,
                   ItemPointer tid,
                   LockTupleMode lockmode,
                   TupleTableSlot *oldslot,
+                  bool do_epq_recheck,
                   TupleTableSlot **epqslot,
                   TM_Result *tmresultp,
                   TM_FailureData *tmfdp)
@@ -3357,29 +3378,30 @@ GetTupleForTrigger(EState *estate,
                if (tmfd.traversed)
                {
                    /*
-                    * Recheck the tuple using EPQ. For MERGE, we leave this
-                    * to the caller (it must do additional rechecking, and
-                    * might end up executing a different action entirely).
+                    * Recheck the tuple using EPQ, if requested.  Otherwise,
+                    * just return that it was concurrently updated.
                     */
-                   if (estate->es_plannedstmt->commandType == CMD_MERGE)
+                   if (do_epq_recheck)
                    {
-                       if (tmresultp)
-                           *tmresultp = TM_Updated;
-                       return false;
+                       *epqslot = EvalPlanQual(epqstate,
+                                               relation,
+                                               relinfo->ri_RangeTableIndex,
+                                               oldslot);
+
+                       /*
+                        * If PlanQual failed for updated tuple - we must not
+                        * process this tuple!
+                        */
+                       if (TupIsNull(*epqslot))
+                       {
+                           *epqslot = NULL;
+                           return false;
+                       }
                    }
-
-                   *epqslot = EvalPlanQual(epqstate,
-                                           relation,
-                                           relinfo->ri_RangeTableIndex,
-                                           oldslot);
-
-                   /*
-                    * If PlanQual failed for updated tuple - we must not
-                    * process this tuple!
-                    */
-                   if (TupIsNull(*epqslot))
+                   else
                    {
-                       *epqslot = NULL;
+                       if (tmresultp)
+                           *tmresultp = TM_Updated;
                        return false;
                    }
                }
index 53ddd25c42db9dc6d1962d081b2eca8e99b7046f..f262e7a66f771da13df52d234c2ebe2928767236 100644 (file)
@@ -670,7 +670,7 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
        resultRelInfo->ri_TrigDesc->trig_update_before_row)
    {
        if (!ExecBRUpdateTriggers(estate, epqstate, resultRelInfo,
-                                 tid, NULL, slot, NULL, NULL))
+                                 tid, NULL, slot, NULL, NULL, false))
            skip_tuple = true;  /* "do nothing" */
    }
 
@@ -746,7 +746,7 @@ ExecSimpleRelationDelete(ResultRelInfo *resultRelInfo,
        resultRelInfo->ri_TrigDesc->trig_delete_before_row)
    {
        skip_tuple = !ExecBRDeleteTriggers(estate, epqstate, resultRelInfo,
-                                          tid, NULL, NULL, NULL, NULL);
+                                          tid, NULL, NULL, NULL, NULL, false);
    }
 
    if (!skip_tuple)
index 54da8e7995bd34b2705010769d6bae98f09be822..7c6c2c1f6e42ac6cd3c5ad17905f658003c5fa83 100644 (file)
@@ -1474,7 +1474,8 @@ ExecDeletePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
 
        return ExecBRDeleteTriggers(context->estate, context->epqstate,
                                    resultRelInfo, tupleid, oldtuple,
-                                   epqreturnslot, result, &context->tmfd);
+                                   epqreturnslot, result, &context->tmfd,
+                                   context->mtstate->operation == CMD_MERGE);
    }
 
    return true;
@@ -2117,7 +2118,8 @@ ExecUpdatePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
 
        return ExecBRUpdateTriggers(context->estate, context->epqstate,
                                    resultRelInfo, tupleid, oldtuple, slot,
-                                   result, &context->tmfd);
+                                   result, &context->tmfd,
+                                   context->mtstate->operation == CMD_MERGE);
    }
 
    return true;
index 2ed2c4bb3784bfb894e40b4bbaec6ff795f30852..cfd7daa20edacfafe18f38791336b786acaa7d86 100644 (file)
@@ -213,7 +213,8 @@ extern bool ExecBRDeleteTriggers(EState *estate,
                                 HeapTuple fdw_trigtuple,
                                 TupleTableSlot **epqslot,
                                 TM_Result *tmresult,
-                                TM_FailureData *tmfd);
+                                TM_FailureData *tmfd,
+                                bool is_merge_delete);
 extern void ExecARDeleteTriggers(EState *estate,
                                 ResultRelInfo *relinfo,
                                 ItemPointer tupleid,
@@ -235,7 +236,8 @@ extern bool ExecBRUpdateTriggers(EState *estate,
                                 HeapTuple fdw_trigtuple,
                                 TupleTableSlot *newslot,
                                 TM_Result *tmresult,
-                                TM_FailureData *tmfd);
+                                TM_FailureData *tmfd,
+                                bool is_merge_update);
 extern void ExecARUpdateTriggers(EState *estate,
                                 ResultRelInfo *relinfo,
                                 ResultRelInfo *src_partinfo,
index 9a44a5959270b796aa4b1661ab06ee833c695cc9..90300f1db5ab38789120835803c31ce4e6058395 100644 (file)
@@ -241,19 +241,28 @@ starting permutation: update_bal1_tg merge_bal_tg c2 select1_tg c1
 s2: NOTICE:  Update: (1,160,s1,setup) -> (1,50,s1,"setup updated by update_bal1_tg")
 step update_bal1_tg: UPDATE target_tg t SET balance = 50, val = t.val || ' updated by update_bal1_tg' WHERE t.key = 1;
 step merge_bal_tg: 
-  MERGE INTO target_tg t
-  USING (SELECT 1 as key) s
-  ON s.key = t.key
-  WHEN MATCHED AND balance < 100 THEN
-   UPDATE SET balance = balance * 2, val = t.val || ' when1'
-  WHEN MATCHED AND balance < 200 THEN
-   UPDATE SET balance = balance * 4, val = t.val || ' when2'
-  WHEN MATCHED AND balance < 300 THEN
-   UPDATE SET balance = balance * 8, val = t.val || ' when3';
+  WITH t AS (
+    MERGE INTO target_tg t
+    USING (SELECT 1 as key) s
+    ON s.key = t.key
+    WHEN MATCHED AND balance < 100 THEN
+      UPDATE SET balance = balance * 2, val = t.val || ' when1'
+    WHEN MATCHED AND balance < 200 THEN
+      UPDATE SET balance = balance * 4, val = t.val || ' when2'
+    WHEN MATCHED AND balance < 300 THEN
+      UPDATE SET balance = balance * 8, val = t.val || ' when3'
+    RETURNING t.*
+  )
+  SELECT * FROM t;
  <waiting ...>
 step c2: COMMIT;
 s1: NOTICE:  Update: (1,50,s1,"setup updated by update_bal1_tg") -> (1,100,s1,"setup updated by update_bal1_tg when1")
 step merge_bal_tg: <... completed>
+key|balance|status|val                                  
+---+-------+------+-------------------------------------
+  1|    100|s1    |setup updated by update_bal1_tg when1
+(1 row)
+
 step select1_tg: SELECT * FROM target_tg;
 key|balance|status|val                                  
 ---+-------+------+-------------------------------------
index 26266b8c2978e02f7dbb8c7a7d38e6830c18b00d..15226e40c9efc64950c5306727f9536491e58a56 100644 (file)
@@ -99,15 +99,19 @@ step "merge_bal_pa"
 }
 step "merge_bal_tg"
 {
-  MERGE INTO target_tg t
-  USING (SELECT 1 as key) s
-  ON s.key = t.key
-  WHEN MATCHED AND balance < 100 THEN
-   UPDATE SET balance = balance * 2, val = t.val || ' when1'
-  WHEN MATCHED AND balance < 200 THEN
-   UPDATE SET balance = balance * 4, val = t.val || ' when2'
-  WHEN MATCHED AND balance < 300 THEN
-   UPDATE SET balance = balance * 8, val = t.val || ' when3';
+  WITH t AS (
+    MERGE INTO target_tg t
+    USING (SELECT 1 as key) s
+    ON s.key = t.key
+    WHEN MATCHED AND balance < 100 THEN
+      UPDATE SET balance = balance * 2, val = t.val || ' when1'
+    WHEN MATCHED AND balance < 200 THEN
+      UPDATE SET balance = balance * 4, val = t.val || ' when2'
+    WHEN MATCHED AND balance < 300 THEN
+      UPDATE SET balance = balance * 8, val = t.val || ' when3'
+    RETURNING t.*
+  )
+  SELECT * FROM t;
 }
 
 step "merge_delete"