Fix decoding of speculative aborts.
authorAmit Kapila <akapila@postgresql.org>
Tue, 15 Jun 2021 02:58:36 +0000 (08:28 +0530)
committerAmit Kapila <akapila@postgresql.org>
Tue, 15 Jun 2021 02:58:36 +0000 (08:28 +0530)
During decoding for speculative inserts, we were relying for cleaning
toast hash on confirmation records or next change records. But that
could lead to multiple problems (a) memory leak if there is neither a
confirmation record nor any other record after toast insertion for a
speculative insert in the transaction, (b) error and assertion failures
if the next operation is not an insert/update on the same table.

The fix is to start queuing spec abort change and clean up toast hash
and change record during its processing. Currently, we are queuing the
spec aborts for both toast and main table even though we perform cleanup
while processing the main table's spec abort record. Later, if we have a
way to distinguish between the spec abort record of toast and the main
table, we can avoid queuing the change for spec aborts of toast tables.

Reported-by: Ashutosh Bapat
Author: Dilip Kumar
Reviewed-by: Amit Kapila
Backpatch-through: 9.6, where it was introduced
Discussion: https://postgr.es/m/CAExHW5sPKF-Oovx_qZe4p5oM6Dvof7_P+XgsNAViug15Fm99jA@mail.gmail.com

src/backend/replication/logical/decode.c
src/backend/replication/logical/reorderbuffer.c
src/include/replication/reorderbuffer.h

index 70670169acc25f841970d7bd741425c375846954..453efc51e1625e939c9b4b24757fe412ed754fe2 100644 (file)
@@ -1040,19 +1040,17 @@ DecodeDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
    if (target_node.dbNode != ctx->slot->data.database)
        return;
 
-   /*
-    * Super deletions are irrelevant for logical decoding, it's driven by the
-    * confirmation records.
-    */
-   if (xlrec->flags & XLH_DELETE_IS_SUPER)
-       return;
-
    /* output plugin doesn't look for this origin, no need to queue */
    if (FilterByOrigin(ctx, XLogRecGetOrigin(r)))
        return;
 
    change = ReorderBufferGetChange(ctx->reorder);
-   change->action = REORDER_BUFFER_CHANGE_DELETE;
+
+   if (xlrec->flags & XLH_DELETE_IS_SUPER)
+       change->action = REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT;
+   else
+       change->action = REORDER_BUFFER_CHANGE_DELETE;
+
    change->origin_id = XLogRecGetOrigin(r);
 
    memcpy(&change->data.tp.relnode, &target_node, sizeof(RelFileNode));
index f96029f15a45ea9b0335943e6ba8f686051af7b8..19e96f3fd94cecbfb24998d4bf6b9c20f5a6383d 100644 (file)
@@ -443,6 +443,9 @@ ReorderBufferReturnTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
        txn->invalidations = NULL;
    }
 
+   /* Reset the toast hash */
+   ReorderBufferToastReset(rb, txn);
+
    pfree(txn);
 }
 
@@ -520,6 +523,7 @@ ReorderBufferReturnChange(ReorderBuffer *rb, ReorderBufferChange *change,
            }
            break;
        case REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM:
+       case REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT:
        case REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID:
        case REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID:
            break;
@@ -2211,8 +2215,8 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn,
            change_done:
 
                    /*
-                    * Either speculative insertion was confirmed, or it was
-                    * unsuccessful and the record isn't needed anymore.
+                    * If speculative insertion was confirmed, the record isn't
+                    * needed anymore.
                     */
                    if (specinsert != NULL)
                    {
@@ -2254,6 +2258,32 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn,
                    specinsert = change;
                    break;
 
+               case REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT:
+
+                   /*
+                    * Abort for speculative insertion arrived. So cleanup the
+                    * specinsert tuple and toast hash.
+                    *
+                    * Note that we get the spec abort change for each toast
+                    * entry but we need to perform the cleanup only the first
+                    * time we get it for the main table.
+                    */
+                   if (specinsert != NULL)
+                   {
+                       /*
+                        * We must clean the toast hash before processing a
+                        * completely new tuple to avoid confusion about the
+                        * previous tuple's toast chunks.
+                        */
+                       Assert(change->data.tp.clear_toast_afterwards);
+                       ReorderBufferToastReset(rb, txn);
+
+                       /* We don't need this record anymore. */
+                       ReorderBufferReturnChange(rb, specinsert, true);
+                       specinsert = NULL;
+                   }
+                   break;
+
                case REORDER_BUFFER_CHANGE_TRUNCATE:
                    {
                        int         i;
@@ -2360,16 +2390,8 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn,
            }
        }
 
-       /*
-        * There's a speculative insertion remaining, just clean in up, it
-        * can't have been successful, otherwise we'd gotten a confirmation
-        * record.
-        */
-       if (specinsert)
-       {
-           ReorderBufferReturnChange(rb, specinsert, true);
-           specinsert = NULL;
-       }
+       /* speculative insertion record must be freed by now */
+       Assert(!specinsert);
 
        /* clean up the iterator */
        ReorderBufferIterTXNFinish(rb, iterstate);
@@ -3754,6 +3776,7 @@ ReorderBufferSerializeChange(ReorderBuffer *rb, ReorderBufferTXN *txn,
                break;
            }
        case REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM:
+       case REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT:
        case REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID:
        case REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID:
            /* ReorderBufferChange contains everything important */
@@ -4017,6 +4040,7 @@ ReorderBufferChangeSize(ReorderBufferChange *change)
                break;
            }
        case REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM:
+       case REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT:
        case REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID:
        case REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID:
            /* ReorderBufferChange contains everything important */
@@ -4315,6 +4339,7 @@ ReorderBufferRestoreChange(ReorderBuffer *rb, ReorderBufferTXN *txn,
                break;
            }
        case REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM:
+       case REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT:
        case REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID:
        case REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID:
            break;
index 0c6e9d1cb924d139626c7bbc0fd4d4ec2d996f88..ba257d81b511aabf32d974cf5198ddd4372de595 100644 (file)
@@ -46,10 +46,10 @@ typedef struct ReorderBufferTupleBuf
  * changes. Users of the decoding facilities will never see changes with
  * *_INTERNAL_* actions.
  *
- * The INTERNAL_SPEC_INSERT and INTERNAL_SPEC_CONFIRM changes concern
- * "speculative insertions", and their confirmation respectively.  They're
- * used by INSERT .. ON CONFLICT .. UPDATE.  Users of logical decoding don't
- * have to care about these.
+ * The INTERNAL_SPEC_INSERT and INTERNAL_SPEC_CONFIRM, and INTERNAL_SPEC_ABORT
+ * changes concern "speculative insertions", their confirmation, and abort
+ * respectively.  They're used by INSERT .. ON CONFLICT .. UPDATE.  Users of
+ * logical decoding don't have to care about these.
  */
 enum ReorderBufferChangeType
 {
@@ -63,6 +63,7 @@ enum ReorderBufferChangeType
    REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID,
    REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT,
    REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM,
+   REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT,
    REORDER_BUFFER_CHANGE_TRUNCATE
 };