summaryrefslogtreecommitdiff
path: root/src/include
diff options
context:
space:
mode:
authorAlvaro Herrera2013-01-23 15:04:59 +0000
committerAlvaro Herrera2013-01-23 15:04:59 +0000
commit0ac5ad5134f2769ccbaefec73844f8504c4d6182 (patch)
treed9b0ba4a1b65a52030820efe68a9c937c46aad1f /src/include
parentf925c79b9f36c54b67053ade5ad225a75b8dc803 (diff)
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each other, in contrast with already existing "SELECT FOR SHARE" and "SELECT FOR UPDATE". UPDATE commands that do not modify the values stored in the columns that are part of the key of the tuple now grab a SELECT FOR NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently with tuple locks of the FOR KEY SHARE variety. Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this means the concurrency improvement applies to them, which is the whole point of this patch. The added tuple lock semantics require some rejiggering of the multixact module, so that the locking level that each transaction is holding can be stored alongside its Xid. Also, multixacts now need to persist across server restarts and crashes, because they can now represent not only tuple locks, but also tuple updates. This means we need more careful tracking of lifetime of pg_multixact SLRU files; since they now persist longer, we require more infrastructure to figure out when they can be removed. pg_upgrade also needs to be careful to copy pg_multixact files over from the old server to the new, or at least part of multixact.c state, depending on the versions of the old and new servers. Tuple time qualification rules (HeapTupleSatisfies routines) need to be careful not to consider tuples with the "is multi" infomask bit set as being only locked; they might need to look up MultiXact values (i.e. possibly do pg_multixact I/O) to find out the Xid that updated a tuple, whereas they previously were assured to only use information readily available from the tuple header. This is considered acceptable, because the extra I/O would involve cases that would previously cause some commands to block waiting for concurrent transactions to finish. Another important change is the fact that locking tuples that have previously been updated causes the future versions to be marked as locked, too; this is essential for correctness of foreign key checks. This causes additional WAL-logging, also (there was previously a single WAL record for a locked tuple; now there are as many as updated copies of the tuple there exist.) With all this in place, contention related to tuples being checked by foreign key rules should be much reduced. As a bonus, the old behavior that a subtransaction grabbing a stronger tuple lock than the parent (sub)transaction held on a given tuple and later aborting caused the weaker lock to be lost, has been fixed. Many new spec files were added for isolation tester framework, to ensure overall behavior is sane. There's probably room for several more tests. There were several reviewers of this patch; in particular, Noah Misch and Andres Freund spent considerable time in it. Original idea for the patch came from Simon Riggs, after a problem report by Joel Jacobson. Most code is from me, with contributions from Marti Raudsepp, Alexander Shulgin, Noah Misch and Andres Freund. This patch was discussed in several pgsql-hackers threads; the most important start at the following message-ids: AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com 1290721684-sup-3951@alvh.no-ip.org 1294953201-sup-2099@alvh.no-ip.org 1320343602-sup-2290@alvh.no-ip.org 1339690386-sup-8927@alvh.no-ip.org 4FE5FF020200002500048A3D@gw.wicourts.gov 4FEAB90A0200002500048B7D@gw.wicourts.gov
Diffstat (limited to 'src/include')
-rw-r--r--src/include/access/heapam.h23
-rw-r--r--src/include/access/heapam_xlog.h33
-rw-r--r--src/include/access/htup.h6
-rw-r--r--src/include/access/htup_details.h62
-rw-r--r--src/include/access/multixact.h66
-rw-r--r--src/include/access/rewriteheap.h2
-rw-r--r--src/include/catalog/catversion.h2
-rw-r--r--src/include/catalog/pg_class.h24
-rw-r--r--src/include/catalog/pg_control.h4
-rw-r--r--src/include/catalog/pg_database.h10
-rw-r--r--src/include/catalog/pg_proc.h2
-rw-r--r--src/include/commands/cluster.h3
-rw-r--r--src/include/commands/vacuum.h6
-rw-r--r--src/include/executor/executor.h2
-rw-r--r--src/include/nodes/execnodes.h8
-rw-r--r--src/include/nodes/parsenodes.h36
-rw-r--r--src/include/nodes/plannodes.h12
-rw-r--r--src/include/parser/analyze.h2
-rw-r--r--src/include/postgres.h7
-rw-r--r--src/include/storage/lock.h1
-rw-r--r--src/include/utils/builtins.h3
-rw-r--r--src/include/utils/rel.h1
-rw-r--r--src/include/utils/relcache.h4
-rw-r--r--src/include/utils/tqual.h1
24 files changed, 245 insertions, 75 deletions
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index c737b3ff28..af9e506d2b 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -30,12 +30,23 @@
typedef struct BulkInsertStateData *BulkInsertState;
-typedef enum
+/*
+ * Possible lock modes for a tuple.
+ */
+typedef enum LockTupleMode
{
- LockTupleShared,
+ /* SELECT FOR KEY SHARE */
+ LockTupleKeyShare,
+ /* SELECT FOR SHARE */
+ LockTupleShare,
+ /* SELECT FOR NO KEY UPDATE, and UPDATEs that don't modify key columns */
+ LockTupleNoKeyExclusive,
+ /* SELECT FOR UPDATE, UPDATEs that modify key columns, and DELETE */
LockTupleExclusive
} LockTupleMode;
+#define MaxLockTupleMode LockTupleExclusive
+
/*
* When heap_update, heap_delete, or heap_lock_tuple fail because the target
* tuple is already outdated, they fill in this struct to provide information
@@ -129,14 +140,16 @@ extern HTSU_Result heap_delete(Relation relation, ItemPointer tid,
extern HTSU_Result heap_update(Relation relation, ItemPointer otid,
HeapTuple newtup,
CommandId cid, Snapshot crosscheck, bool wait,
- HeapUpdateFailureData *hufd);
+ HeapUpdateFailureData *hufd, LockTupleMode *lockmode);
extern HTSU_Result heap_lock_tuple(Relation relation, HeapTuple tuple,
CommandId cid, LockTupleMode mode, bool nowait,
+ bool follow_update,
Buffer *buffer, HeapUpdateFailureData *hufd);
extern void heap_inplace_update(Relation relation, HeapTuple tuple);
-extern bool heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid);
+extern bool heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
+ TransactionId cutoff_multi);
extern bool heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid,
- Buffer buf);
+ MultiXactId cutoff_multi, Buffer buf);
extern Oid simple_heap_insert(Relation relation, HeapTuple tup);
extern void simple_heap_delete(Relation relation, ItemPointer tid);
diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h
index 9db6953720..270924a01a 100644
--- a/src/include/access/heapam_xlog.h
+++ b/src/include/access/heapam_xlog.h
@@ -54,6 +54,7 @@
#define XLOG_HEAP2_CLEANUP_INFO 0x30
#define XLOG_HEAP2_VISIBLE 0x40
#define XLOG_HEAP2_MULTI_INSERT 0x50
+#define XLOG_HEAP2_LOCK_UPDATED 0x60
/*
* All what we need to find changed tuple
@@ -75,6 +76,8 @@ typedef struct xl_heaptid
typedef struct xl_heap_delete
{
xl_heaptid target; /* deleted tuple id */
+ TransactionId xmax; /* xmax of the deleted tuple */
+ uint8 infobits_set; /* infomask bits */
bool all_visible_cleared; /* PD_ALL_VISIBLE was cleared */
} xl_heap_delete;
@@ -141,7 +144,10 @@ typedef struct xl_multi_insert_tuple
typedef struct xl_heap_update
{
xl_heaptid target; /* deleted tuple id */
+ TransactionId old_xmax; /* xmax of the old tuple */
+ TransactionId new_xmax; /* xmax of the new tuple */
ItemPointerData newtid; /* new inserted tuple id */
+ uint8 old_infobits_set; /* infomask bits to set on old tuple */
bool all_visible_cleared; /* PD_ALL_VISIBLE was cleared */
bool new_all_visible_cleared; /* same for the page of newtid */
/* NEW TUPLE xl_heap_header AND TUPLE DATA FOLLOWS AT END OF STRUCT */
@@ -197,16 +203,32 @@ typedef struct xl_heap_newpage
#define SizeOfHeapNewpage (offsetof(xl_heap_newpage, blkno) + sizeof(BlockNumber))
+/* flags for infobits_set */
+#define XLHL_XMAX_IS_MULTI 0x01
+#define XLHL_XMAX_LOCK_ONLY 0x02
+#define XLHL_XMAX_EXCL_LOCK 0x04
+#define XLHL_XMAX_KEYSHR_LOCK 0x08
+#define XLHL_KEYS_UPDATED 0x10
+
/* This is what we need to know about lock */
typedef struct xl_heap_lock
{
xl_heaptid target; /* locked tuple id */
TransactionId locking_xid; /* might be a MultiXactId not xid */
- bool xid_is_mxact; /* is it? */
- bool shared_lock; /* shared or exclusive row lock? */
+ int8 infobits_set; /* infomask and infomask2 bits to set */
} xl_heap_lock;
-#define SizeOfHeapLock (offsetof(xl_heap_lock, shared_lock) + sizeof(bool))
+#define SizeOfHeapLock (offsetof(xl_heap_lock, infobits_set) + sizeof(int8))
+
+/* This is what we need to know about locking an updated version of a row */
+typedef struct xl_heap_lock_updated
+{
+ xl_heaptid target;
+ TransactionId xmax;
+ uint8 infobits_set;
+} xl_heap_lock_updated;
+
+#define SizeOfHeapLockUpdated (offsetof(xl_heap_lock_updated, infobits_set) + sizeof(uint8))
/* This is what we need to know about in-place update */
typedef struct xl_heap_inplace
@@ -223,10 +245,11 @@ typedef struct xl_heap_freeze
RelFileNode node;
BlockNumber block;
TransactionId cutoff_xid;
+ MultiXactId cutoff_multi;
/* TUPLE OFFSET NUMBERS FOLLOW AT THE END */
} xl_heap_freeze;
-#define SizeOfHeapFreeze (offsetof(xl_heap_freeze, cutoff_xid) + sizeof(TransactionId))
+#define SizeOfHeapFreeze (offsetof(xl_heap_freeze, cutoff_multi) + sizeof(MultiXactId))
/* This is what we need to know about setting a visibility map bit */
typedef struct xl_heap_visible
@@ -254,7 +277,7 @@ extern XLogRecPtr log_heap_clean(Relation reln, Buffer buffer,
OffsetNumber *nowunused, int nunused,
TransactionId latestRemovedXid);
extern XLogRecPtr log_heap_freeze(Relation reln, Buffer buffer,
- TransactionId cutoff_xid,
+ TransactionId cutoff_xid, MultiXactId cutoff_multi,
OffsetNumber *offsets, int offcnt);
extern XLogRecPtr log_heap_visible(RelFileNode rnode, BlockNumber block,
Buffer vm_buffer, TransactionId cutoff_xid);
diff --git a/src/include/access/htup.h b/src/include/access/htup.h
index 9cd4b88ed4..79e3c50ef1 100644
--- a/src/include/access/htup.h
+++ b/src/include/access/htup.h
@@ -80,7 +80,9 @@ typedef HeapTupleData *HeapTuple;
extern CommandId HeapTupleHeaderGetCmin(HeapTupleHeader tup);
extern CommandId HeapTupleHeaderGetCmax(HeapTupleHeader tup);
extern void HeapTupleHeaderAdjustCmax(HeapTupleHeader tup,
- CommandId *cmax,
- bool *iscombo);
+ CommandId *cmax, bool *iscombo);
+
+/* Prototype for HeapTupleHeader accessors in heapam.c */
+extern TransactionId HeapTupleGetUpdateXid(HeapTupleHeader tuple);
#endif /* HTUP_H */
diff --git a/src/include/access/htup_details.h b/src/include/access/htup_details.h
index aeab45bb97..6a28d8ed74 100644
--- a/src/include/access/htup_details.h
+++ b/src/include/access/htup_details.h
@@ -162,12 +162,16 @@ struct HeapTupleHeaderData
#define HEAP_HASVARWIDTH 0x0002 /* has variable-width attribute(s) */
#define HEAP_HASEXTERNAL 0x0004 /* has external stored attribute(s) */
#define HEAP_HASOID 0x0008 /* has an object-id field */
-/* bit 0x0010 is available */
+#define HEAP_XMAX_KEYSHR_LOCK 0x0010 /* xmax is a key-shared locker */
#define HEAP_COMBOCID 0x0020 /* t_cid is a combo cid */
#define HEAP_XMAX_EXCL_LOCK 0x0040 /* xmax is exclusive locker */
-#define HEAP_XMAX_SHARED_LOCK 0x0080 /* xmax is shared locker */
-/* if either LOCK bit is set, xmax hasn't deleted the tuple, only locked it */
-#define HEAP_IS_LOCKED (HEAP_XMAX_EXCL_LOCK | HEAP_XMAX_SHARED_LOCK)
+#define HEAP_XMAX_LOCK_ONLY 0x0080 /* xmax, if valid, is only a locker */
+
+ /* xmax is a shared locker */
+#define HEAP_XMAX_SHR_LOCK (HEAP_XMAX_EXCL_LOCK | HEAP_XMAX_KEYSHR_LOCK)
+
+#define HEAP_LOCK_MASK (HEAP_XMAX_SHR_LOCK | HEAP_XMAX_EXCL_LOCK | \
+ HEAP_XMAX_KEYSHR_LOCK)
#define HEAP_XMIN_COMMITTED 0x0100 /* t_xmin committed */
#define HEAP_XMIN_INVALID 0x0200 /* t_xmin invalid/aborted */
#define HEAP_XMAX_COMMITTED 0x0400 /* t_xmax committed */
@@ -182,17 +186,42 @@ struct HeapTupleHeaderData
* upgrade support */
#define HEAP_MOVED (HEAP_MOVED_OFF | HEAP_MOVED_IN)
-#define HEAP_XACT_MASK 0xFFE0 /* visibility-related bits */
+#define HEAP_XACT_MASK 0xFFF0 /* visibility-related bits */
+
+/*
+ * A tuple is only locked (i.e. not updated by its Xmax) if it the
+ * HEAP_XMAX_LOCK_ONLY bit is set.
+ *
+ * See also HeapTupleHeaderIsOnlyLocked, which also checks for a possible
+ * aborted updater transaction.
+ */
+#define HEAP_XMAX_IS_LOCKED_ONLY(infomask) \
+ ((infomask) & HEAP_XMAX_LOCK_ONLY)
+/*
+ * Use these to test whether a particular lock is applied to a tuple
+ */
+#define HEAP_XMAX_IS_SHR_LOCKED(infomask) \
+ (((infomask) & HEAP_LOCK_MASK) == HEAP_XMAX_SHR_LOCK)
+#define HEAP_XMAX_IS_EXCL_LOCKED(infomask) \
+ (((infomask) & HEAP_LOCK_MASK) == HEAP_XMAX_EXCL_LOCK)
+#define HEAP_XMAX_IS_KEYSHR_LOCKED(infomask) \
+ (((infomask) & HEAP_LOCK_MASK) == HEAP_XMAX_KEYSHR_LOCK)
+
+/* turn these all off when Xmax is to change */
+#define HEAP_XMAX_BITS (HEAP_XMAX_COMMITTED | HEAP_XMAX_INVALID | \
+ HEAP_XMAX_IS_MULTI | HEAP_LOCK_MASK | HEAP_XMAX_LOCK_ONLY)
/*
* information stored in t_infomask2:
*/
#define HEAP_NATTS_MASK 0x07FF /* 11 bits for number of attributes */
-/* bits 0x3800 are available */
+/* bits 0x1800 are available */
+#define HEAP_KEYS_UPDATED 0x2000 /* tuple was updated and key cols
+ * modified, or tuple deleted */
#define HEAP_HOT_UPDATED 0x4000 /* tuple was HOT-updated */
#define HEAP_ONLY_TUPLE 0x8000 /* this is heap-only tuple */
-#define HEAP2_XACT_MASK 0xC000 /* visibility-related bits */
+#define HEAP2_XACT_MASK 0xE000 /* visibility-related bits */
/*
* HEAP_TUPLE_HAS_MATCH is a temporary flag used during hash joins. It is
@@ -219,7 +248,24 @@ struct HeapTupleHeaderData
(tup)->t_choice.t_heap.t_xmin = (xid) \
)
-#define HeapTupleHeaderGetXmax(tup) \
+/*
+ * HeapTupleHeaderGetRawXmax gets you the raw Xmax field. To find out the Xid
+ * that updated a tuple, you might need to resolve the MultiXactId if certain
+ * bits are set. HeapTupleHeaderGetUpdateXid checks those bits and takes care
+ * to resolve the MultiXactId if necessary. This might involve multixact I/O,
+ * so it should only be used if absolutely necessary.
+ */
+#define HeapTupleHeaderGetUpdateXid(tup) \
+( \
+ (!((tup)->t_infomask & HEAP_XMAX_INVALID) && \
+ ((tup)->t_infomask & HEAP_XMAX_IS_MULTI) && \
+ !((tup)->t_infomask & HEAP_XMAX_LOCK_ONLY)) ? \
+ HeapTupleGetUpdateXid(tup) \
+ : \
+ HeapTupleHeaderGetRawXmax(tup) \
+)
+
+#define HeapTupleHeaderGetRawXmax(tup) \
( \
(tup)->t_choice.t_heap.t_xmax \
)
diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h
index b5486bec09..b08bb1f49a 100644
--- a/src/include/access/multixact.h
+++ b/src/include/access/multixact.h
@@ -13,8 +13,15 @@
#include "access/xlog.h"
+
+/*
+ * The first two MultiXactId values are reserved to store the truncation Xid
+ * and epoch of the first segment, so we start assigning multixact values from
+ * 2.
+ */
#define InvalidMultiXactId ((MultiXactId) 0)
#define FirstMultiXactId ((MultiXactId) 1)
+#define MaxMultiXactId ((MultiXactId) 0xFFFFFFFF)
#define MultiXactIdIsValid(multi) ((multi) != InvalidMultiXactId)
@@ -22,6 +29,33 @@
#define NUM_MXACTOFFSET_BUFFERS 8
#define NUM_MXACTMEMBER_BUFFERS 16
+/*
+ * Possible multixact lock modes ("status"). The first four modes are for
+ * tuple locks (FOR KEY SHARE, FOR SHARE, FOR NO KEY UPDATE, FOR UPDATE); the
+ * next two are used for update and delete modes.
+ */
+typedef enum
+{
+ MultiXactStatusForKeyShare = 0x00,
+ MultiXactStatusForShare = 0x01,
+ MultiXactStatusForNoKeyUpdate = 0x02,
+ MultiXactStatusForUpdate = 0x03,
+ /* an update that doesn't touch "key" columns */
+ MultiXactStatusNoKeyUpdate = 0x04,
+ /* other updates, and delete */
+ MultiXactStatusUpdate = 0x05
+} MultiXactStatus;
+
+#define MaxMultiXactStatus MultiXactStatusUpdate
+
+
+typedef struct MultiXactMember
+{
+ TransactionId xid;
+ MultiXactStatus status;
+} MultiXactMember;
+
+
/* ----------------
* multixact-related XLOG entries
* ----------------
@@ -35,21 +69,24 @@ typedef struct xl_multixact_create
{
MultiXactId mid; /* new MultiXact's ID */
MultiXactOffset moff; /* its starting offset in members file */
- int32 nxids; /* number of member XIDs */
- TransactionId xids[1]; /* VARIABLE LENGTH ARRAY */
+ int32 nmembers; /* number of member XIDs */
+ MultiXactMember members[FLEXIBLE_ARRAY_MEMBER];
} xl_multixact_create;
-#define MinSizeOfMultiXactCreate offsetof(xl_multixact_create, xids)
+#define SizeOfMultiXactCreate (offsetof(xl_multixact_create, members))
-extern MultiXactId MultiXactIdCreate(TransactionId xid1, TransactionId xid2);
-extern MultiXactId MultiXactIdExpand(MultiXactId multi, TransactionId xid);
+extern MultiXactId MultiXactIdCreate(TransactionId xid1,
+ MultiXactStatus status1, TransactionId xid2,
+ MultiXactStatus status2);
+extern MultiXactId MultiXactIdExpand(MultiXactId multi, TransactionId xid,
+ MultiXactStatus status);
+extern MultiXactId ReadNextMultiXactId(void);
extern bool MultiXactIdIsRunning(MultiXactId multi);
-extern bool MultiXactIdIsCurrent(MultiXactId multi);
-extern void MultiXactIdWait(MultiXactId multi);
-extern bool ConditionalMultiXactIdWait(MultiXactId multi);
extern void MultiXactIdSetOldestMember(void);
-extern int GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids);
+extern int GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **xids,
+ bool allow_old);
+extern bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2);
extern void AtEOXact_MultiXact(void);
extern void AtPrepare_MultiXact(void);
@@ -60,14 +97,21 @@ extern void MultiXactShmemInit(void);
extern void BootStrapMultiXact(void);
extern void StartupMultiXact(void);
extern void ShutdownMultiXact(void);
+extern void SetMultiXactIdLimit(MultiXactId oldest_datminmxid,
+ Oid oldest_datoid);
extern void MultiXactGetCheckptMulti(bool is_shutdown,
MultiXactId *nextMulti,
- MultiXactOffset *nextMultiOffset);
+ MultiXactOffset *nextMultiOffset,
+ MultiXactId *oldestMulti,
+ Oid *oldestMultiDB);
extern void CheckPointMultiXact(void);
+extern MultiXactId GetOldestMultiXactId(void);
+extern void TruncateMultiXact(MultiXactId cutoff_multi);
extern void MultiXactSetNextMXact(MultiXactId nextMulti,
MultiXactOffset nextMultiOffset);
extern void MultiXactAdvanceNextMXact(MultiXactId minMulti,
MultiXactOffset minMultiOffset);
+extern void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB);
extern void multixact_twophase_recover(TransactionId xid, uint16 info,
void *recdata, uint32 len);
@@ -78,5 +122,7 @@ extern void multixact_twophase_postabort(TransactionId xid, uint16 info,
extern void multixact_redo(XLogRecPtr lsn, XLogRecord *record);
extern void multixact_desc(StringInfo buf, uint8 xl_info, char *rec);
+extern char *mxid_to_string(MultiXactId multi, int nmembers,
+ MultiXactMember *members);
#endif /* MULTIXACT_H */
diff --git a/src/include/access/rewriteheap.h b/src/include/access/rewriteheap.h
index 13b991a8b1..f82d1f5734 100644
--- a/src/include/access/rewriteheap.h
+++ b/src/include/access/rewriteheap.h
@@ -21,7 +21,7 @@ typedef struct RewriteStateData *RewriteState;
extern RewriteState begin_heap_rewrite(Relation NewHeap,
TransactionId OldestXmin, TransactionId FreezeXid,
- bool use_wal);
+ MultiXactId MultiXactFrzLimit, bool use_wal);
extern void end_heap_rewrite(RewriteState state);
extern void rewrite_heap_tuple(RewriteState state, HeapTuple oldTuple,
HeapTuple newTuple);
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index a676793566..4b8fa0175b 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 201301211
+#define CATALOG_VERSION_NO 201301231
#endif
diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h
index fcc293899a..820552f013 100644
--- a/src/include/catalog/pg_class.h
+++ b/src/include/catalog/pg_class.h
@@ -67,6 +67,8 @@ CATALOG(pg_class,1259) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83) BKI_SCHEMA_MACRO
bool relhastriggers; /* has (or has had) any TRIGGERs */
bool relhassubclass; /* has (or has had) derived classes */
TransactionId relfrozenxid; /* all Xids < this are frozen in this rel */
+ TransactionId relminmxid; /* all multixacts in this rel are >= this.
+ * this is really a MultiXactId */
#ifdef CATALOG_VARLEN /* variable-length fields start here */
/* NOTE: These fields are not present in a relcache entry's rd_rel field. */
@@ -77,7 +79,7 @@ CATALOG(pg_class,1259) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83) BKI_SCHEMA_MACRO
/* Size of fixed part of pg_class tuples, not counting var-length fields */
#define CLASS_TUPLE_SIZE \
- (offsetof(FormData_pg_class,relfrozenxid) + sizeof(TransactionId))
+ (offsetof(FormData_pg_class,relminmxid) + sizeof(TransactionId))
/* ----------------
* Form_pg_class corresponds to a pointer to a tuple with
@@ -91,7 +93,7 @@ typedef FormData_pg_class *Form_pg_class;
* ----------------
*/
-#define Natts_pg_class 27
+#define Natts_pg_class 28
#define Anum_pg_class_relname 1
#define Anum_pg_class_relnamespace 2
#define Anum_pg_class_reltype 3
@@ -117,8 +119,9 @@ typedef FormData_pg_class *Form_pg_class;
#define Anum_pg_class_relhastriggers 23
#define Anum_pg_class_relhassubclass 24
#define Anum_pg_class_relfrozenxid 25
-#define Anum_pg_class_relacl 26
-#define Anum_pg_class_reloptions 27
+#define Anum_pg_class_relminmxid 26
+#define Anum_pg_class_relacl 27
+#define Anum_pg_class_reloptions 28
/* ----------------
* initial contents of pg_class
@@ -129,14 +132,17 @@ typedef FormData_pg_class *Form_pg_class;
* ----------------
*/
-/* Note: "3" in the relfrozenxid column stands for FirstNormalTransactionId */
-DATA(insert OID = 1247 ( pg_type PGNSP 71 0 PGUID 0 0 0 0 0 0 0 0 f f p r 30 0 t f f f f 3 _null_ _null_ ));
+/*
+ * Note: "3" in the relfrozenxid column stands for FirstNormalTransactionId;
+ * similarly, "1" in relminmxid stands for FirstMultiXactId
+ */
+DATA(insert OID = 1247 ( pg_type PGNSP 71 0 PGUID 0 0 0 0 0 0 0 0 f f p r 30 0 t f f f f 3 1 _null_ _null_ ));
DESCR("");
-DATA(insert OID = 1249 ( pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 0 f f p r 21 0 f f f f f 3 _null_ _null_ ));
+DATA(insert OID = 1249 ( pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 0 f f p r 21 0 f f f f f 3 1 _null_ _null_ ));
DESCR("");
-DATA(insert OID = 1255 ( pg_proc PGNSP 81 0 PGUID 0 0 0 0 0 0 0 0 f f p r 27 0 t f f f f 3 _null_ _null_ ));
+DATA(insert OID = 1255 ( pg_proc PGNSP 81 0 PGUID 0 0 0 0 0 0 0 0 f f p r 27 0 t f f f f 3 1 _null_ _null_ ));
DESCR("");
-DATA(insert OID = 1259 ( pg_class PGNSP 83 0 PGUID 0 0 0 0 0 0 0 0 f f p r 27 0 t f f f f 3 _null_ _null_ ));
+DATA(insert OID = 1259 ( pg_class PGNSP 83 0 PGUID 0 0 0 0 0 0 0 0 f f p r 28 0 t f f f f 3 1 _null_ _null_ ));
DESCR("");
diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h
index ead3a6e4ba..e4a9abe7bc 100644
--- a/src/include/catalog/pg_control.h
+++ b/src/include/catalog/pg_control.h
@@ -21,7 +21,7 @@
/* Version identifier for this pg_control format */
-#define PG_CONTROL_VERSION 932
+#define PG_CONTROL_VERSION 933
/*
* Body of CheckPoint XLOG records. This is declared here because we keep
@@ -41,6 +41,8 @@ typedef struct CheckPoint
MultiXactOffset nextMultiOffset; /* next free MultiXact offset */
TransactionId oldestXid; /* cluster-wide minimum datfrozenxid */
Oid oldestXidDB; /* database with minimum datfrozenxid */
+ MultiXactId oldestMulti; /* cluster-wide minimum datminmxid */
+ Oid oldestMultiDB; /* database with minimum datminmxid */
pg_time_t time; /* time stamp of checkpoint */
/*
diff --git a/src/include/catalog/pg_database.h b/src/include/catalog/pg_database.h
index 4010959b02..baeddcd12a 100644
--- a/src/include/catalog/pg_database.h
+++ b/src/include/catalog/pg_database.h
@@ -41,6 +41,7 @@ CATALOG(pg_database,1262) BKI_SHARED_RELATION BKI_ROWTYPE_OID(1248) BKI_SCHEMA_M
int32 datconnlimit; /* max connections allowed (-1=no limit) */
Oid datlastsysoid; /* highest OID to consider a system OID */
TransactionId datfrozenxid; /* all Xids < this are frozen in this DB */
+ TransactionId datminmxid; /* all multixacts in the DB are >= this */
Oid dattablespace; /* default table space for this DB */
#ifdef CATALOG_VARLEN /* variable-length fields start here */
@@ -59,7 +60,7 @@ typedef FormData_pg_database *Form_pg_database;
* compiler constants for pg_database
* ----------------
*/
-#define Natts_pg_database 12
+#define Natts_pg_database 13
#define Anum_pg_database_datname 1
#define Anum_pg_database_datdba 2
#define Anum_pg_database_encoding 3
@@ -70,10 +71,11 @@ typedef FormData_pg_database *Form_pg_database;
#define Anum_pg_database_datconnlimit 8
#define Anum_pg_database_datlastsysoid 9
#define Anum_pg_database_datfrozenxid 10
-#define Anum_pg_database_dattablespace 11
-#define Anum_pg_database_datacl 12
+#define Anum_pg_database_datminmxid 11
+#define Anum_pg_database_dattablespace 12
+#define Anum_pg_database_datacl 13
-DATA(insert OID = 1 ( template1 PGUID ENCODING "LC_COLLATE" "LC_CTYPE" t t -1 0 0 1663 _null_));
+DATA(insert OID = 1 ( template1 PGUID ENCODING "LC_COLLATE" "LC_CTYPE" t t -1 0 0 1 1663 _null_));
SHDESCR("default template for new databases");
#define TemplateDbOid 1
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 010605d774..028e1684ff 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -2909,6 +2909,8 @@ DATA(insert OID = 1371 ( pg_lock_status PGNSP PGUID 12 1 1000 0 0 f f f f t t
DESCR("view system lock information");
DATA(insert OID = 1065 ( pg_prepared_xact PGNSP PGUID 12 1 1000 0 0 f f f f t t v 0 0 2249 "" "{28,25,1184,26,26}" "{o,o,o,o,o}" "{transaction,gid,prepared,ownerid,dbid}" _null_ pg_prepared_xact _null_ _null_ _null_ ));
DESCR("view two-phase transactions");
+DATA(insert OID = 3819 ( pg_get_multixact_members PGNSP PGUID 12 1 1000 0 0 f f f f t t v 1 0 2249 "28" "{28,28,25}" "{i,o,o}" "{multixid,xid,mode}" _null_ pg_get_multixact_members _null_ _null_ _null_ ));
+DESCR("view members of a multixactid");
DATA(insert OID = 3537 ( pg_describe_object PGNSP PGUID 12 1 0 0 0 f f f f t f s 3 0 25 "26 26 23" _null_ _null_ _null_ _null_ pg_describe_object _null_ _null_ _null_ ));
DESCR("get identification of SQL object");
diff --git a/src/include/commands/cluster.h b/src/include/commands/cluster.h
index 532c31c11b..73c701fe53 100644
--- a/src/include/commands/cluster.h
+++ b/src/include/commands/cluster.h
@@ -30,6 +30,7 @@ extern void finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap,
bool is_system_catalog,
bool swap_toast_by_content,
bool check_constraints,
- TransactionId frozenXid);
+ TransactionId frozenXid,
+ MultiXactId frozenMulti);
#endif /* CLUSTER_H */
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index f70442af4a..d8dd8b04ed 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -153,12 +153,14 @@ extern void vac_update_relstats(Relation relation,
double num_tuples,
BlockNumber num_all_visible_pages,
bool hasindex,
- TransactionId frozenxid);
+ TransactionId frozenxid,
+ MultiXactId minmulti);
extern void vacuum_set_xid_limits(int freeze_min_age, int freeze_table_age,
bool sharedRel,
TransactionId *oldestXmin,
TransactionId *freezeLimit,
- TransactionId *freezeTableLimit);
+ TransactionId *freezeTableLimit,
+ MultiXactId *multiXactFrzLimit);
extern void vac_update_datfrozenxid(void);
extern void vacuum_delay_point(void);
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 5046893866..b1213a0635 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -193,7 +193,7 @@ extern void ExecConstraints(ResultRelInfo *resultRelInfo,
extern ExecRowMark *ExecFindRowMark(EState *estate, Index rti);
extern ExecAuxRowMark *ExecBuildAuxRowMark(ExecRowMark *erm, List *targetlist);
extern TupleTableSlot *EvalPlanQual(EState *estate, EPQState *epqstate,
- Relation relation, Index rti,
+ Relation relation, Index rti, int lockmode,
ItemPointer tid, TransactionId priorXmax);
extern HeapTuple EvalPlanQualFetch(EState *estate, Relation relation,
int lockmode, ItemPointer tid, TransactionId priorXmax);
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index b23989e19f..76e8cdb1ad 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -403,9 +403,9 @@ typedef struct EState
/*
* ExecRowMark -
- * runtime representation of FOR UPDATE/SHARE clauses
+ * runtime representation of FOR [KEY] UPDATE/SHARE clauses
*
- * When doing UPDATE, DELETE, or SELECT FOR UPDATE/SHARE, we should have an
+ * When doing UPDATE, DELETE, or SELECT FOR [KEY] UPDATE/SHARE, we should have an
* ExecRowMark for each non-target relation in the query (except inheritance
* parent RTEs, which can be ignored at runtime). See PlanRowMark for details
* about most of the fields. In addition to fields directly derived from
@@ -426,7 +426,7 @@ typedef struct ExecRowMark
/*
* ExecAuxRowMark -
- * additional runtime representation of FOR UPDATE/SHARE clauses
+ * additional runtime representation of FOR [KEY] UPDATE/SHARE clauses
*
* Each LockRows and ModifyTable node keeps a list of the rowmarks it needs to
* deal with. In addition to a pointer to the related entry in es_rowMarks,
@@ -1824,7 +1824,7 @@ typedef struct SetOpState
/* ----------------
* LockRowsState information
*
- * LockRows nodes are used to enforce FOR UPDATE/FOR SHARE locking.
+ * LockRows nodes are used to enforce FOR [KEY] UPDATE/SHARE locking.
* ----------------
*/
typedef struct LockRowsState
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 56cf592e0c..d8678e5b3f 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -74,7 +74,7 @@ typedef uint32 AclMode; /* a bitmask of privilege bits */
#define ACL_CONNECT (1<<11) /* for databases */
#define N_ACL_RIGHTS 12 /* 1 plus the last 1<<x */
#define ACL_NO_RIGHTS 0
-/* Currently, SELECT ... FOR UPDATE/FOR SHARE requires UPDATE privileges */
+/* Currently, SELECT ... FOR [KEY] UPDATE/SHARE requires UPDATE privileges */
#define ACL_SELECT_FOR_UPDATE ACL_UPDATE
@@ -119,7 +119,7 @@ typedef struct Query
bool hasDistinctOn; /* distinctClause is from DISTINCT ON */
bool hasRecursive; /* WITH RECURSIVE was specified */
bool hasModifyingCTE; /* has INSERT/UPDATE/DELETE in WITH */
- bool hasForUpdate; /* FOR UPDATE or FOR SHARE was specified */
+ bool hasForUpdate; /* FOR [KEY] UPDATE/SHARE was specified */
List *cteList; /* WITH list (of CommonTableExpr's) */
@@ -572,18 +572,28 @@ typedef struct DefElem
} DefElem;
/*
- * LockingClause - raw representation of FOR UPDATE/SHARE options
+ * LockingClause - raw representation of FOR [NO KEY] UPDATE/[KEY] SHARE
+ * options
*
* Note: lockedRels == NIL means "all relations in query". Otherwise it
* is a list of RangeVar nodes. (We use RangeVar mainly because it carries
* a location field --- currently, parse analysis insists on unqualified
* names in LockingClause.)
*/
+typedef enum LockClauseStrength
+{
+ /* order is important -- see applyLockingClause */
+ LCS_FORKEYSHARE,
+ LCS_FORSHARE,
+ LCS_FORNOKEYUPDATE,
+ LCS_FORUPDATE
+} LockClauseStrength;
+
typedef struct LockingClause
{
NodeTag type;
- List *lockedRels; /* FOR UPDATE or FOR SHARE relations */
- bool forUpdate; /* true = FOR UPDATE, false = FOR SHARE */
+ List *lockedRels; /* FOR [KEY] UPDATE/SHARE relations */
+ LockClauseStrength strength;
bool noWait; /* NOWAIT option */
} LockingClause;
@@ -865,21 +875,21 @@ typedef struct WindowClause
/*
* RowMarkClause -
- * parser output representation of FOR UPDATE/SHARE clauses
+ * parser output representation of FOR [KEY] UPDATE/SHARE clauses
*
* Query.rowMarks contains a separate RowMarkClause node for each relation
- * identified as a FOR UPDATE/SHARE target. If FOR UPDATE/SHARE is applied
- * to a subquery, we generate RowMarkClauses for all normal and subquery rels
- * in the subquery, but they are marked pushedDown = true to distinguish them
- * from clauses that were explicitly written at this query level. Also,
- * Query.hasForUpdate tells whether there were explicit FOR UPDATE/SHARE
- * clauses in the current query level.
+ * identified as a FOR [KEY] UPDATE/SHARE target. If one of these clauses
+ * is applied to a subquery, we generate RowMarkClauses for all normal and
+ * subquery rels in the subquery, but they are marked pushedDown = true to
+ * distinguish them from clauses that were explicitly written at this query
+ * level. Also, Query.hasForUpdate tells whether there were explicit FOR
+ * UPDATE/SHARE/KEY SHARE clauses in the current query level.
*/
typedef struct RowMarkClause
{
NodeTag type;
Index rti; /* range table index of target relation */
- bool forUpdate; /* true = FOR UPDATE, false = FOR SHARE */
+ LockClauseStrength strength;
bool noWait; /* NOWAIT option */
bool pushedDown; /* pushed down from higher query level? */
} RowMarkClause;
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 41c5e92034..0b8b1076bb 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -752,7 +752,7 @@ typedef struct Limit
* RowMarkType -
* enums for types of row-marking operations
*
- * When doing UPDATE, DELETE, or SELECT FOR UPDATE/SHARE, we have to uniquely
+ * When doing UPDATE, DELETE, or SELECT FOR [KEY] UPDATE/SHARE, we have to uniquely
* identify all the source rows, not only those from the target relations, so
* that we can perform EvalPlanQual rechecking at need. For plain tables we
* can just fetch the TID, the same as for a target relation. Otherwise (for
@@ -763,20 +763,22 @@ typedef struct Limit
typedef enum RowMarkType
{
ROW_MARK_EXCLUSIVE, /* obtain exclusive tuple lock */
+ ROW_MARK_NOKEYEXCLUSIVE, /* obtain no-key exclusive tuple lock */
ROW_MARK_SHARE, /* obtain shared tuple lock */
+ ROW_MARK_KEYSHARE, /* obtain keyshare tuple lock */
ROW_MARK_REFERENCE, /* just fetch the TID */
ROW_MARK_COPY /* physically copy the row value */
} RowMarkType;
-#define RowMarkRequiresRowShareLock(marktype) ((marktype) <= ROW_MARK_SHARE)
+#define RowMarkRequiresRowShareLock(marktype) ((marktype) <= ROW_MARK_KEYSHARE)
/*
* PlanRowMark -
- * plan-time representation of FOR UPDATE/SHARE clauses
+ * plan-time representation of FOR [KEY] UPDATE/SHARE clauses
*
- * When doing UPDATE, DELETE, or SELECT FOR UPDATE/SHARE, we create a separate
+ * When doing UPDATE, DELETE, or SELECT FOR [KEY] UPDATE/SHARE, we create a separate
* PlanRowMark node for each non-target relation in the query. Relations that
- * are not specified as FOR UPDATE/SHARE are marked ROW_MARK_REFERENCE (if
+ * are not specified as FOR [KEY] UPDATE/SHARE are marked ROW_MARK_REFERENCE (if
* real tables) or ROW_MARK_COPY (if not).
*
* Initially all PlanRowMarks have rti == prti and isParent == false.
diff --git a/src/include/parser/analyze.h b/src/include/parser/analyze.h
index fc45153f36..2f988d4021 100644
--- a/src/include/parser/analyze.h
+++ b/src/include/parser/analyze.h
@@ -38,6 +38,6 @@ extern bool analyze_requires_snapshot(Node *parseTree);
extern void CheckSelectLocking(Query *qry);
extern void applyLockingClause(Query *qry, Index rtindex,
- bool forUpdate, bool noWait, bool pushedDown);
+ LockClauseStrength strength, bool noWait, bool pushedDown);
#endif /* ANALYZE_H */
diff --git a/src/include/postgres.h b/src/include/postgres.h
index b6e922f358..8ff107a7b3 100644
--- a/src/include/postgres.h
+++ b/src/include/postgres.h
@@ -457,6 +457,13 @@ typedef Datum *DatumPtr;
#define TransactionIdGetDatum(X) ((Datum) SET_4_BYTES((X)))
/*
+ * MultiXactIdGetDatum
+ * Returns datum representation for a multixact identifier.
+ */
+
+#define MultiXactIdGetDatum(X) ((Datum) SET_4_BYTES((X)))
+
+/*
* DatumGetCommandId
* Returns command identifier value of a datum.
*/
diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h
index c8974c9ac5..f10c8f194f 100644
--- a/src/include/storage/lock.h
+++ b/src/include/storage/lock.h
@@ -478,6 +478,7 @@ typedef enum
extern void InitLocks(void);
extern LockMethod GetLocksMethodTable(const LOCK *lock);
extern uint32 LockTagHashCode(const LOCKTAG *locktag);
+extern bool DoLockModesConflict(LOCKMODE mode1, LOCKMODE mode2);
extern LockAcquireResult LockAcquire(const LOCKTAG *locktag,
LOCKMODE lockmode,
bool sessionLock,
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index 61d6aef2ed..ad4d68cd50 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -1134,6 +1134,9 @@ extern Datum ginarrayconsistent(PG_FUNCTION_ARGS);
/* access/transam/twophase.c */
extern Datum pg_prepared_xact(PG_FUNCTION_ARGS);
+/* access/transam/multixact.c */
+extern Datum pg_get_multixact_members(PG_FUNCTION_ARGS);
+
/* catalogs/dependency.c */
extern Datum pg_describe_object(PG_FUNCTION_ARGS);
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index bde5f1738e..c342eaa66f 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -114,6 +114,7 @@ typedef struct RelationData
Oid rd_id; /* relation's object id */
List *rd_indexlist; /* list of OIDs of indexes on relation */
Bitmapset *rd_indexattr; /* identifies columns used in indexes */
+ Bitmapset *rd_keyattr; /* cols that can be ref'd by foreign keys */
Oid rd_oidindex; /* OID of unique index on OID, if any */
LockInfoData rd_lockInfo; /* lock mgr's info for locking relation */
RuleLock *rd_rules; /* rewrite rules */
diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h
index 444fad3460..1ec2683eac 100644
--- a/src/include/utils/relcache.h
+++ b/src/include/utils/relcache.h
@@ -41,7 +41,7 @@ extern List *RelationGetIndexList(Relation relation);
extern Oid RelationGetOidIndex(Relation relation);
extern List *RelationGetIndexExpressions(Relation relation);
extern List *RelationGetIndexPredicate(Relation relation);
-extern Bitmapset *RelationGetIndexAttrBitmap(Relation relation);
+extern Bitmapset *RelationGetIndexAttrBitmap(Relation relation, bool keyAttrs);
extern void RelationGetExclusionInfo(Relation indexRelation,
Oid **operators,
Oid **procs,
@@ -77,7 +77,7 @@ extern Relation RelationBuildLocalRelation(const char *relname,
* Routine to manage assignment of new relfilenode to a relation
*/
extern void RelationSetNewRelfilenode(Relation relation,
- TransactionId freezeXid);
+ TransactionId freezeXid, MultiXactId minmulti);
/*
* Routines for flushing/rebuilding relcache entries in various scenarios
diff --git a/src/include/utils/tqual.h b/src/include/utils/tqual.h
index 72a8ea42e5..465231c758 100644
--- a/src/include/utils/tqual.h
+++ b/src/include/utils/tqual.h
@@ -88,5 +88,6 @@ extern bool HeapTupleIsSurelyDead(HeapTupleHeader tuple,
extern void HeapTupleSetHintBits(HeapTupleHeader tuple, Buffer buffer,
uint16 infomask, TransactionId xid);
+extern bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple);
#endif /* TQUAL_H */