summaryrefslogtreecommitdiff
path: root/src/include
diff options
context:
space:
mode:
authorAndres Freund2018-04-07 20:24:10 +0000
committerAndres Freund2018-04-07 20:24:27 +0000
commitf16241bef7cc271bff60e23de2f827a10e50dde8 (patch)
tree3e35912975c9be3038ce3b35625d21763979a313 /src/include
parent8224de4f42ccf98e08db07b43d52fed72f962ebb (diff)
Raise error when affecting tuple moved into different partition.
When an update moves a row between partitions (supported since 2f178441044b), our normal logic for following update chains in READ COMMITTED mode doesn't work anymore. Cross partition updates are modeled as an delete from the old and insert into the new partition. No ctid chain exists across partitions, and there's no convenient space to introduce that link. Not throwing an error in a partitioned context when one would have been thrown without partitioning is obviously problematic. This commit introduces infrastructure to detect when a tuple has been moved, not just plainly deleted. That allows to throw an error when encountering a deletion that's actually a move, while attempting to following a ctid chain. The row deleted as part of a cross partition update is marked by pointing it's t_ctid to an invalid block, instead of self as a normal update would. That was deemed to be the least invasive and most future proof way to represent the knowledge, given how few infomask bits are there to be recycled (there's also some locking issues with using infomask bits). External code following ctid chains should be updated to check for moved tuples. The most likely consequence of not doing so is a missed error. Author: Amul Sul, editorialized by me Reviewed-By: Amit Kapila, Pavan Deolasee, Andres Freund, Robert Haas Discussion: http://postgr.es/m/CAAJ_b95PkwojoYfz0bzXU8OokcTVGzN6vYGCNVUukeUDrnF3dw@mail.gmail.com
Diffstat (limited to 'src/include')
-rw-r--r--src/include/access/heapam.h2
-rw-r--r--src/include/access/heapam_xlog.h1
-rw-r--r--src/include/access/htup_details.h12
-rw-r--r--src/include/executor/nodeModifyTable.h3
-rw-r--r--src/include/storage/itemptr.h16
5 files changed, 30 insertions, 4 deletions
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 608f50b0616..7d756f20b08 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -167,7 +167,7 @@ extern void heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
CommandId cid, int options, BulkInsertState bistate);
extern HTSU_Result heap_delete(Relation relation, ItemPointer tid,
CommandId cid, Snapshot crosscheck, bool wait,
- HeapUpdateFailureData *hufd);
+ HeapUpdateFailureData *hufd, bool changingPart);
extern void heap_finish_speculative(Relation relation, HeapTuple tuple);
extern void heap_abort_speculative(Relation relation, HeapTuple tuple);
extern HTSU_Result heap_update(Relation relation, ItemPointer otid,
diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h
index 0052e4c569a..cf88ff7cb44 100644
--- a/src/include/access/heapam_xlog.h
+++ b/src/include/access/heapam_xlog.h
@@ -93,6 +93,7 @@
#define XLH_DELETE_CONTAINS_OLD_TUPLE (1<<1)
#define XLH_DELETE_CONTAINS_OLD_KEY (1<<2)
#define XLH_DELETE_IS_SUPER (1<<3)
+#define XLH_DELETE_IS_PARTITION_MOVE (1<<4)
/* convenience macro for checking whether any form of old tuple was logged */
#define XLH_DELETE_CONTAINS_OLD \
diff --git a/src/include/access/htup_details.h b/src/include/access/htup_details.h
index cebaea097d1..cf56d4ace43 100644
--- a/src/include/access/htup_details.h
+++ b/src/include/access/htup_details.h
@@ -83,8 +83,10 @@
*
* A word about t_ctid: whenever a new tuple is stored on disk, its t_ctid
* is initialized with its own TID (location). If the tuple is ever updated,
- * its t_ctid is changed to point to the replacement version of the tuple.
- * Thus, a tuple is the latest version of its row iff XMAX is invalid or
+ * its t_ctid is changed to point to the replacement version of the tuple or
+ * the block number (ip_blkid) is invalidated if the tuple is moved from one
+ * partition to another partition relation due to an update of the partition
+ * key. Thus, a tuple is the latest version of its row iff XMAX is invalid or
* t_ctid points to itself (in which case, if XMAX is valid, the tuple is
* either locked or deleted). One can follow the chain of t_ctid links
* to find the newest version of the row. Beware however that VACUUM might
@@ -445,6 +447,12 @@ do { \
ItemPointerSet(&(tup)->t_ctid, token, SpecTokenOffsetNumber) \
)
+#define HeapTupleHeaderSetMovedPartitions(tup) \
+ ItemPointerSetMovedPartitions(&(tup)->t_ctid)
+
+#define HeapTupleHeaderIndicatesMovedPartitions(tup) \
+ ItemPointerIndicatesMovedPartitions(&tup->t_ctid)
+
#define HeapTupleHeaderGetDatumLength(tup) \
VARSIZE(tup)
diff --git a/src/include/executor/nodeModifyTable.h b/src/include/executor/nodeModifyTable.h
index 94fd60c38cf..7e9ab3cb6b4 100644
--- a/src/include/executor/nodeModifyTable.h
+++ b/src/include/executor/nodeModifyTable.h
@@ -27,7 +27,8 @@ extern TupleTableSlot *ExecDelete(ModifyTableState *mtstate,
ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *planSlot,
EPQState *epqstate, EState *estate, bool *tupleDeleted,
bool processReturning, HeapUpdateFailureData *hufdp,
- MergeActionState *actionState, bool canSetTag);
+ MergeActionState *actionState, bool canSetTag,
+ bool changingPart);
extern TupleTableSlot *ExecUpdate(ModifyTableState *mtstate,
ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot,
TupleTableSlot *planSlot, EPQState *epqstate, EState *estate,
diff --git a/src/include/storage/itemptr.h b/src/include/storage/itemptr.h
index 6c9ed3696b7..626c98f9691 100644
--- a/src/include/storage/itemptr.h
+++ b/src/include/storage/itemptr.h
@@ -154,6 +154,22 @@ typedef ItemPointerData *ItemPointer;
(pointer)->ip_posid = InvalidOffsetNumber \
)
+/*
+ * ItemPointerIndicatesMovedPartitions
+ * True iff the block number indicates the tuple has moved to another
+ * partition.
+ */
+#define ItemPointerIndicatesMovedPartitions(pointer) \
+ !BlockNumberIsValid(ItemPointerGetBlockNumberNoCheck(pointer))
+
+/*
+ * ItemPointerSetMovedPartitions
+ * Indicate that the item referenced by the itempointer has moved into a
+ * different partition.
+ */
+#define ItemPointerSetMovedPartitions(pointer) \
+ ItemPointerSetBlockNumber((pointer), InvalidBlockNumber)
+
/* ----------------
* externs
* ----------------