summaryrefslogtreecommitdiff
path: root/src/backend/commands
diff options
context:
space:
mode:
authorAlvaro Herrera2013-01-23 15:04:59 +0000
committerAlvaro Herrera2013-01-23 15:04:59 +0000
commit0ac5ad5134f2769ccbaefec73844f8504c4d6182 (patch)
treed9b0ba4a1b65a52030820efe68a9c937c46aad1f /src/backend/commands
parentf925c79b9f36c54b67053ade5ad225a75b8dc803 (diff)
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each other, in contrast with already existing "SELECT FOR SHARE" and "SELECT FOR UPDATE". UPDATE commands that do not modify the values stored in the columns that are part of the key of the tuple now grab a SELECT FOR NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently with tuple locks of the FOR KEY SHARE variety. Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this means the concurrency improvement applies to them, which is the whole point of this patch. The added tuple lock semantics require some rejiggering of the multixact module, so that the locking level that each transaction is holding can be stored alongside its Xid. Also, multixacts now need to persist across server restarts and crashes, because they can now represent not only tuple locks, but also tuple updates. This means we need more careful tracking of lifetime of pg_multixact SLRU files; since they now persist longer, we require more infrastructure to figure out when they can be removed. pg_upgrade also needs to be careful to copy pg_multixact files over from the old server to the new, or at least part of multixact.c state, depending on the versions of the old and new servers. Tuple time qualification rules (HeapTupleSatisfies routines) need to be careful not to consider tuples with the "is multi" infomask bit set as being only locked; they might need to look up MultiXact values (i.e. possibly do pg_multixact I/O) to find out the Xid that updated a tuple, whereas they previously were assured to only use information readily available from the tuple header. This is considered acceptable, because the extra I/O would involve cases that would previously cause some commands to block waiting for concurrent transactions to finish. Another important change is the fact that locking tuples that have previously been updated causes the future versions to be marked as locked, too; this is essential for correctness of foreign key checks. This causes additional WAL-logging, also (there was previously a single WAL record for a locked tuple; now there are as many as updated copies of the tuple there exist.) With all this in place, contention related to tuples being checked by foreign key rules should be much reduced. As a bonus, the old behavior that a subtransaction grabbing a stronger tuple lock than the parent (sub)transaction held on a given tuple and later aborting caused the weaker lock to be lost, has been fixed. Many new spec files were added for isolation tester framework, to ensure overall behavior is sane. There's probably room for several more tests. There were several reviewers of this patch; in particular, Noah Misch and Andres Freund spent considerable time in it. Original idea for the patch came from Simon Riggs, after a problem report by Joel Jacobson. Most code is from me, with contributions from Marti Raudsepp, Alexander Shulgin, Noah Misch and Andres Freund. This patch was discussed in several pgsql-hackers threads; the most important start at the following message-ids: AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com 1290721684-sup-3951@alvh.no-ip.org 1294953201-sup-2099@alvh.no-ip.org 1320343602-sup-2290@alvh.no-ip.org 1339690386-sup-8927@alvh.no-ip.org 4FE5FF020200002500048A3D@gw.wicourts.gov 4FEAB90A0200002500048B7D@gw.wicourts.gov
Diffstat (limited to 'src/backend/commands')
-rw-r--r--src/backend/commands/analyze.c9
-rw-r--r--src/backend/commands/cluster.c37
-rw-r--r--src/backend/commands/dbcommands.c15
-rw-r--r--src/backend/commands/sequence.c10
-rw-r--r--src/backend/commands/tablecmds.c12
-rw-r--r--src/backend/commands/trigger.c32
-rw-r--r--src/backend/commands/vacuum.c96
-rw-r--r--src/backend/commands/vacuumlazy.c24
8 files changed, 180 insertions, 55 deletions
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 7a5eb42424b..d7b17a5aba6 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -16,6 +16,7 @@
#include <math.h>
+#include "access/multixact.h"
#include "access/transam.h"
#include "access/tupconvert.h"
#include "access/tuptoaster.h"
@@ -580,7 +581,8 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
totalrows,
visibilitymap_count(onerel),
hasindex,
- InvalidTransactionId);
+ InvalidTransactionId,
+ InvalidMultiXactId);
/*
* Same for indexes. Vacuum always scans all indexes, so if we're part of
@@ -600,7 +602,8 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
totalindexrows,
0,
false,
- InvalidTransactionId);
+ InvalidTransactionId,
+ InvalidMultiXactId);
}
}
@@ -1193,7 +1196,7 @@ acquire_sample_rows(Relation onerel, int elevel,
* right. (Note: this works out properly when the row was
* both inserted and deleted in our xact.)
*/
- if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(targtuple.t_data)))
+ if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(targtuple.t_data)))
deadrows += 1;
else
liverows += 1;
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index 238781b6a70..c0cb2f66545 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -17,6 +17,7 @@
*/
#include "postgres.h"
+#include "access/multixact.h"
#include "access/relscan.h"
#include "access/rewriteheap.h"
#include "access/transam.h"
@@ -65,7 +66,8 @@ static void rebuild_relation(Relation OldHeap, Oid indexOid,
int freeze_min_age, int freeze_table_age, bool verbose);
static void copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
int freeze_min_age, int freeze_table_age, bool verbose,
- bool *pSwapToastByContent, TransactionId *pFreezeXid);
+ bool *pSwapToastByContent, TransactionId *pFreezeXid,
+ MultiXactId *pFreezeMulti);
static List *get_tables_to_cluster(MemoryContext cluster_context);
static void reform_and_rewrite_tuple(HeapTuple tuple,
TupleDesc oldTupDesc, TupleDesc newTupDesc,
@@ -549,6 +551,7 @@ rebuild_relation(Relation OldHeap, Oid indexOid,
bool is_system_catalog;
bool swap_toast_by_content;
TransactionId frozenXid;
+ MultiXactId frozenMulti;
/* Mark the correct index as clustered */
if (OidIsValid(indexOid))
@@ -566,14 +569,14 @@ rebuild_relation(Relation OldHeap, Oid indexOid,
/* Copy the heap data into the new table in the desired order */
copy_heap_data(OIDNewHeap, tableOid, indexOid,
freeze_min_age, freeze_table_age, verbose,
- &swap_toast_by_content, &frozenXid);
+ &swap_toast_by_content, &frozenXid, &frozenMulti);
/*
* Swap the physical files of the target and transient tables, then
* rebuild the target's indexes and throw away the transient table.
*/
finish_heap_swap(tableOid, OIDNewHeap, is_system_catalog,
- swap_toast_by_content, false, frozenXid);
+ swap_toast_by_content, false, frozenXid, frozenMulti);
}
@@ -706,7 +709,8 @@ make_new_heap(Oid OIDOldHeap, Oid NewTableSpace)
static void
copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
int freeze_min_age, int freeze_table_age, bool verbose,
- bool *pSwapToastByContent, TransactionId *pFreezeXid)
+ bool *pSwapToastByContent, TransactionId *pFreezeXid,
+ MultiXactId *pFreezeMulti)
{
Relation NewHeap,
OldHeap,
@@ -722,6 +726,7 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
bool is_system_catalog;
TransactionId OldestXmin;
TransactionId FreezeXid;
+ MultiXactId MultiXactFrzLimit;
RewriteState rwstate;
bool use_sort;
Tuplesortstate *tuplesort;
@@ -822,7 +827,7 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
*/
vacuum_set_xid_limits(freeze_min_age, freeze_table_age,
OldHeap->rd_rel->relisshared,
- &OldestXmin, &FreezeXid, NULL);
+ &OldestXmin, &FreezeXid, NULL, &MultiXactFrzLimit);
/*
* FreezeXid will become the table's new relfrozenxid, and that mustn't go
@@ -831,14 +836,16 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
if (TransactionIdPrecedes(FreezeXid, OldHeap->rd_rel->relfrozenxid))
FreezeXid = OldHeap->rd_rel->relfrozenxid;
- /* return selected value to caller */
+ /* return selected values to caller */
*pFreezeXid = FreezeXid;
+ *pFreezeMulti = MultiXactFrzLimit;
/* Remember if it's a system catalog */
is_system_catalog = IsSystemRelation(OldHeap);
/* Initialize the rewrite operation */
- rwstate = begin_heap_rewrite(NewHeap, OldestXmin, FreezeXid, use_wal);
+ rwstate = begin_heap_rewrite(NewHeap, OldestXmin, FreezeXid,
+ MultiXactFrzLimit, use_wal);
/*
* Decide whether to use an indexscan or seqscan-and-optional-sort to scan
@@ -966,9 +973,8 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
/*
* Similar situation to INSERT_IN_PROGRESS case.
*/
- Assert(!(tuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI));
if (!is_system_catalog &&
- !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple->t_data)))
+ !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tuple->t_data)))
elog(WARNING, "concurrent delete in progress within table \"%s\"",
RelationGetRelationName(OldHeap));
/* treat as recently dead */
@@ -1097,6 +1103,7 @@ static void
swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
bool swap_toast_by_content,
TransactionId frozenXid,
+ MultiXactId frozenMulti,
Oid *mapped_tables)
{
Relation relRelation;
@@ -1204,11 +1211,13 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
* and then fail to commit the pg_class update.
*/
- /* set rel1's frozen Xid */
+ /* set rel1's frozen Xid and minimum MultiXid */
if (relform1->relkind != RELKIND_INDEX)
{
Assert(TransactionIdIsNormal(frozenXid));
relform1->relfrozenxid = frozenXid;
+ Assert(MultiXactIdIsValid(frozenMulti));
+ relform1->relminmxid = frozenMulti;
}
/* swap size statistics too, since new rel has freshly-updated stats */
@@ -1272,6 +1281,7 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
target_is_pg_class,
swap_toast_by_content,
frozenXid,
+ frozenMulti,
mapped_tables);
}
else
@@ -1361,6 +1371,7 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
target_is_pg_class,
swap_toast_by_content,
InvalidTransactionId,
+ InvalidMultiXactId,
mapped_tables);
/* Clean up. */
@@ -1398,7 +1409,8 @@ finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap,
bool is_system_catalog,
bool swap_toast_by_content,
bool check_constraints,
- TransactionId frozenXid)
+ TransactionId frozenXid,
+ MultiXactId frozenMulti)
{
ObjectAddress object;
Oid mapped_tables[4];
@@ -1414,7 +1426,8 @@ finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap,
*/
swap_relation_files(OIDOldHeap, OIDNewHeap,
(OIDOldHeap == RelationRelationId),
- swap_toast_by_content, frozenXid, mapped_tables);
+ swap_toast_by_content, frozenXid, frozenMulti,
+ mapped_tables);
/*
* If it's a system catalog, queue an sinval message to flush all
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index 4ad4b997585..5b06af24a6c 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -80,6 +80,7 @@ static bool get_db_info(const char *name, LOCKMODE lockmode,
Oid *dbIdP, Oid *ownerIdP,
int *encodingP, bool *dbIsTemplateP, bool *dbAllowConnP,
Oid *dbLastSysOidP, TransactionId *dbFrozenXidP,
+ MultiXactId *dbMinMultiP,
Oid *dbTablespace, char **dbCollate, char **dbCtype);
static bool have_createdb_privilege(void);
static void remove_dbtablespaces(Oid db_id);
@@ -104,6 +105,7 @@ createdb(const CreatedbStmt *stmt)
bool src_allowconn;
Oid src_lastsysoid;
TransactionId src_frozenxid;
+ MultiXactId src_minmxid;
Oid src_deftablespace;
volatile Oid dst_deftablespace;
Relation pg_database_rel;
@@ -288,7 +290,7 @@ createdb(const CreatedbStmt *stmt)
if (!get_db_info(dbtemplate, ShareLock,
&src_dboid, &src_owner, &src_encoding,
&src_istemplate, &src_allowconn, &src_lastsysoid,
- &src_frozenxid, &src_deftablespace,
+ &src_frozenxid, &src_minmxid, &src_deftablespace,
&src_collate, &src_ctype))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_DATABASE),
@@ -491,6 +493,7 @@ createdb(const CreatedbStmt *stmt)
new_record[Anum_pg_database_datconnlimit - 1] = Int32GetDatum(dbconnlimit);
new_record[Anum_pg_database_datlastsysoid - 1] = ObjectIdGetDatum(src_lastsysoid);
new_record[Anum_pg_database_datfrozenxid - 1] = TransactionIdGetDatum(src_frozenxid);
+ new_record[Anum_pg_database_datminmxid - 1] = TransactionIdGetDatum(src_minmxid);
new_record[Anum_pg_database_dattablespace - 1] = ObjectIdGetDatum(dst_deftablespace);
/*
@@ -786,7 +789,7 @@ dropdb(const char *dbname, bool missing_ok)
pgdbrel = heap_open(DatabaseRelationId, RowExclusiveLock);
if (!get_db_info(dbname, AccessExclusiveLock, &db_id, NULL, NULL,
- &db_istemplate, NULL, NULL, NULL, NULL, NULL, NULL))
+ &db_istemplate, NULL, NULL, NULL, NULL, NULL, NULL, NULL))
{
if (!missing_ok)
{
@@ -945,7 +948,7 @@ RenameDatabase(const char *oldname, const char *newname)
rel = heap_open(DatabaseRelationId, RowExclusiveLock);
if (!get_db_info(oldname, AccessExclusiveLock, &db_id, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL))
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_DATABASE),
errmsg("database \"%s\" does not exist", oldname)));
@@ -1046,7 +1049,7 @@ movedb(const char *dbname, const char *tblspcname)
pgdbrel = heap_open(DatabaseRelationId, RowExclusiveLock);
if (!get_db_info(dbname, AccessExclusiveLock, &db_id, NULL, NULL,
- NULL, NULL, NULL, NULL, &src_tblspcoid, NULL, NULL))
+ NULL, NULL, NULL, NULL, NULL, &src_tblspcoid, NULL, NULL))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_DATABASE),
errmsg("database \"%s\" does not exist", dbname)));
@@ -1599,6 +1602,7 @@ get_db_info(const char *name, LOCKMODE lockmode,
Oid *dbIdP, Oid *ownerIdP,
int *encodingP, bool *dbIsTemplateP, bool *dbAllowConnP,
Oid *dbLastSysOidP, TransactionId *dbFrozenXidP,
+ MultiXactId *dbMinMultiP,
Oid *dbTablespace, char **dbCollate, char **dbCtype)
{
bool result = false;
@@ -1685,6 +1689,9 @@ get_db_info(const char *name, LOCKMODE lockmode,
/* limit of frozen XIDs */
if (dbFrozenXidP)
*dbFrozenXidP = dbform->datfrozenxid;
+ /* limit of frozen Multixacts */
+ if (dbMinMultiP)
+ *dbMinMultiP = dbform->datminmxid;
/* default tablespace for this database */
if (dbTablespace)
*dbTablespace = dbform->dattablespace;
diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c
index 1f2546d69ca..de41c8a1c71 100644
--- a/src/backend/commands/sequence.c
+++ b/src/backend/commands/sequence.c
@@ -14,8 +14,9 @@
*/
#include "postgres.h"
-#include "access/transam.h"
#include "access/htup_details.h"
+#include "access/multixact.h"
+#include "access/transam.h"
#include "access/xlogutils.h"
#include "catalog/dependency.h"
#include "catalog/namespace.h"
@@ -282,8 +283,10 @@ ResetSequence(Oid seq_relid)
/*
* Create a new storage file for the sequence. We want to keep the
* sequence's relfrozenxid at 0, since it won't contain any unfrozen XIDs.
+ * Same with relminmxid, since a sequence will never contain multixacts.
*/
- RelationSetNewRelfilenode(seq_rel, InvalidTransactionId);
+ RelationSetNewRelfilenode(seq_rel, InvalidTransactionId,
+ InvalidMultiXactId);
/*
* Insert the modified tuple into the new storage file.
@@ -1110,7 +1113,8 @@ read_seq_tuple(SeqTable elm, Relation rel, Buffer *buf, HeapTuple seqtuple)
* bit update, ie, don't bother to WAL-log it, since we can certainly do
* this again if the update gets lost.
*/
- if (HeapTupleHeaderGetXmax(seqtuple->t_data) != InvalidTransactionId)
+ Assert(!(seqtuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI));
+ if (HeapTupleHeaderGetRawXmax(seqtuple->t_data) != InvalidTransactionId)
{
HeapTupleHeaderSetXmax(seqtuple->t_data, InvalidTransactionId);
seqtuple->t_data->t_infomask &= ~HEAP_XMAX_COMMITTED;
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index cad83117f95..6bc056bbc33 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -15,7 +15,9 @@
#include "postgres.h"
#include "access/genam.h"
+#include "access/heapam.h"
#include "access/heapam_xlog.h"
+#include "access/multixact.h"
#include "access/reloptions.h"
#include "access/relscan.h"
#include "access/sysattr.h"
@@ -1130,6 +1132,7 @@ ExecuteTruncate(TruncateStmt *stmt)
{
Oid heap_relid;
Oid toast_relid;
+ MultiXactId minmulti;
/*
* This effectively deletes all rows in the table, and may be done
@@ -1139,6 +1142,8 @@ ExecuteTruncate(TruncateStmt *stmt)
*/
CheckTableForSerializableConflictIn(rel);
+ minmulti = GetOldestMultiXactId();
+
/*
* Need the full transaction-safe pushups.
*
@@ -1146,7 +1151,7 @@ ExecuteTruncate(TruncateStmt *stmt)
* as the relfilenode value. The old storage file is scheduled for
* deletion at commit.
*/
- RelationSetNewRelfilenode(rel, RecentXmin);
+ RelationSetNewRelfilenode(rel, RecentXmin, minmulti);
if (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED)
heap_create_init_fork(rel);
@@ -1159,7 +1164,7 @@ ExecuteTruncate(TruncateStmt *stmt)
if (OidIsValid(toast_relid))
{
rel = relation_open(toast_relid, AccessExclusiveLock);
- RelationSetNewRelfilenode(rel, RecentXmin);
+ RelationSetNewRelfilenode(rel, RecentXmin, minmulti);
if (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED)
heap_create_init_fork(rel);
heap_close(rel, NoLock);
@@ -3516,7 +3521,8 @@ ATRewriteTables(List **wqueue, LOCKMODE lockmode)
* interest in letting this code work on system catalogs.
*/
finish_heap_swap(tab->relid, OIDNewHeap,
- false, false, true, RecentXmin);
+ false, false, true, RecentXmin,
+ ReadNextMultiXactId());
}
else
{
diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
index a719cf24f43..f11a8ec5d42 100644
--- a/src/backend/commands/trigger.c
+++ b/src/backend/commands/trigger.c
@@ -73,6 +73,7 @@ static HeapTuple GetTupleForTrigger(EState *estate,
EPQState *epqstate,
ResultRelInfo *relinfo,
ItemPointer tid,
+ LockTupleMode lockmode,
TupleTableSlot **newSlot);
static bool TriggerEnabled(EState *estate, ResultRelInfo *relinfo,
Trigger *trigger, TriggerEvent event,
@@ -2147,7 +2148,7 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
int i;
trigtuple = GetTupleForTrigger(estate, epqstate, relinfo, tupleid,
- &newSlot);
+ LockTupleExclusive, &newSlot);
if (trigtuple == NULL)
return false;
@@ -2201,7 +2202,8 @@ ExecARDeleteTriggers(EState *estate, ResultRelInfo *relinfo,
if (trigdesc && trigdesc->trig_delete_after_row)
{
HeapTuple trigtuple = GetTupleForTrigger(estate, NULL, relinfo,
- tupleid, NULL);
+ tupleid, LockTupleExclusive,
+ NULL);
AfterTriggerSaveEvent(estate, relinfo, TRIGGER_EVENT_DELETE,
true, trigtuple, NULL, NIL, NULL);
@@ -2332,10 +2334,24 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
TupleTableSlot *newSlot;
int i;
Bitmapset *modifiedCols;
+ Bitmapset *keyCols;
+ LockTupleMode lockmode;
+
+ /*
+ * Compute lock mode to use. If columns that are part of the key have not
+ * been modified, then we can use a weaker lock, allowing for better
+ * concurrency.
+ */
+ modifiedCols = GetModifiedColumns(relinfo, estate);
+ keyCols = RelationGetIndexAttrBitmap(relinfo->ri_RelationDesc, true);
+ if (bms_overlap(keyCols, modifiedCols))
+ lockmode = LockTupleExclusive;
+ else
+ lockmode = LockTupleNoKeyExclusive;
/* get a copy of the on-disk tuple we are planning to update */
trigtuple = GetTupleForTrigger(estate, epqstate, relinfo, tupleid,
- &newSlot);
+ lockmode, &newSlot);
if (trigtuple == NULL)
return NULL; /* cancel the update action */
@@ -2357,7 +2373,6 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
newtuple = slottuple;
}
- modifiedCols = GetModifiedColumns(relinfo, estate);
LocTriggerData.type = T_TriggerData;
LocTriggerData.tg_event = TRIGGER_EVENT_UPDATE |
@@ -2426,7 +2441,8 @@ ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
if (trigdesc && trigdesc->trig_update_after_row)
{
HeapTuple trigtuple = GetTupleForTrigger(estate, NULL, relinfo,
- tupleid, NULL);
+ tupleid, LockTupleExclusive,
+ NULL);
AfterTriggerSaveEvent(estate, relinfo, TRIGGER_EVENT_UPDATE,
true, trigtuple, newtuple, recheckIndexes,
@@ -2565,6 +2581,7 @@ GetTupleForTrigger(EState *estate,
EPQState *epqstate,
ResultRelInfo *relinfo,
ItemPointer tid,
+ LockTupleMode lockmode,
TupleTableSlot **newSlot)
{
Relation relation = relinfo->ri_RelationDesc;
@@ -2589,8 +2606,8 @@ ltrmark:;
tuple.t_self = *tid;
test = heap_lock_tuple(relation, &tuple,
estate->es_output_cid,
- LockTupleExclusive, false /* wait */,
- &buffer, &hufd);
+ lockmode, false /* wait */,
+ false, &buffer, &hufd);
switch (test)
{
case HeapTupleSelfUpdated:
@@ -2630,6 +2647,7 @@ ltrmark:;
epqstate,
relation,
relinfo->ri_RangeTableIndex,
+ lockmode,
&hufd.ctid,
hufd.xmax);
if (!TupIsNull(epqslot))
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 2d3170a2504..a37a54e5b42 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -26,6 +26,7 @@
#include "access/genam.h"
#include "access/heapam.h"
#include "access/htup_details.h"
+#include "access/multixact.h"
#include "access/transam.h"
#include "access/xact.h"
#include "catalog/namespace.h"
@@ -63,7 +64,7 @@ static BufferAccessStrategy vac_strategy;
/* non-export function prototypes */
static List *get_rel_oids(Oid relid, const RangeVar *vacrel);
-static void vac_truncate_clog(TransactionId frozenXID);
+static void vac_truncate_clog(TransactionId frozenXID, MultiXactId frozenMulti);
static bool vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast,
bool for_wraparound);
@@ -379,7 +380,8 @@ vacuum_set_xid_limits(int freeze_min_age,
bool sharedRel,
TransactionId *oldestXmin,
TransactionId *freezeLimit,
- TransactionId *freezeTableLimit)
+ TransactionId *freezeTableLimit,
+ MultiXactId *multiXactFrzLimit)
{
int freezemin;
TransactionId limit;
@@ -463,8 +465,22 @@ vacuum_set_xid_limits(int freeze_min_age,
*freezeTableLimit = limit;
}
-}
+ if (multiXactFrzLimit != NULL)
+ {
+ MultiXactId mxLimit;
+
+ /*
+ * simplistic multixactid freezing: use the same freezing policy as
+ * for Xids
+ */
+ mxLimit = GetOldestMultiXactId() - freezemin;
+ if (mxLimit < FirstMultiXactId)
+ mxLimit = FirstMultiXactId;
+
+ *multiXactFrzLimit = mxLimit;
+ }
+}
/*
* vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
@@ -574,7 +590,8 @@ void
vac_update_relstats(Relation relation,
BlockNumber num_pages, double num_tuples,
BlockNumber num_all_visible_pages,
- bool hasindex, TransactionId frozenxid)
+ bool hasindex, TransactionId frozenxid,
+ MultiXactId minmulti)
{
Oid relid = RelationGetRelid(relation);
Relation rd;
@@ -648,6 +665,14 @@ vac_update_relstats(Relation relation,
dirty = true;
}
+ /* relminmxid must never go backward, either */
+ if (MultiXactIdIsValid(minmulti) &&
+ MultiXactIdPrecedes(pgcform->relminmxid, minmulti))
+ {
+ pgcform->relminmxid = minmulti;
+ dirty = true;
+ }
+
/* If anything changed, write out the tuple. */
if (dirty)
heap_inplace_update(rd, ctup);
@@ -660,8 +685,13 @@ vac_update_relstats(Relation relation,
* vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
*
* Update pg_database's datfrozenxid entry for our database to be the
- * minimum of the pg_class.relfrozenxid values. If we are able to
- * advance pg_database.datfrozenxid, also try to truncate pg_clog.
+ * minimum of the pg_class.relfrozenxid values.
+ *
+ * Similarly, update our datfrozenmulti to be the minimum of the
+ * pg_class.relfrozenmulti values.
+ *
+ * If we are able to advance either pg_database value, also try to
+ * truncate pg_clog and pg_multixact.
*
* We violate transaction semantics here by overwriting the database's
* existing pg_database tuple with the new value. This is reasonably
@@ -678,17 +708,24 @@ vac_update_datfrozenxid(void)
SysScanDesc scan;
HeapTuple classTup;
TransactionId newFrozenXid;
+ MultiXactId newFrozenMulti;
bool dirty = false;
/*
* Initialize the "min" calculation with GetOldestXmin, which is a
* reasonable approximation to the minimum relfrozenxid for not-yet-
* committed pg_class entries for new tables; see AddNewRelationTuple().
- * Se we cannot produce a wrong minimum by starting with this.
+ * So we cannot produce a wrong minimum by starting with this.
*/
newFrozenXid = GetOldestXmin(true, true);
/*
+ * Similarly, initialize the MultiXact "min" with the value that would
+ * be used on pg_class for new tables. See AddNewRelationTuple().
+ */
+ newFrozenMulti = GetOldestMultiXactId();
+
+ /*
* We must seqscan pg_class to find the minimum Xid, because there is no
* index that can help us here.
*/
@@ -710,9 +747,13 @@ vac_update_datfrozenxid(void)
continue;
Assert(TransactionIdIsNormal(classForm->relfrozenxid));
+ Assert(MultiXactIdIsValid(classForm->relminmxid));
if (TransactionIdPrecedes(classForm->relfrozenxid, newFrozenXid))
newFrozenXid = classForm->relfrozenxid;
+
+ if (MultiXactIdPrecedes(classForm->relminmxid, newFrozenMulti))
+ newFrozenMulti = classForm->relminmxid;
}
/* we're done with pg_class */
@@ -720,6 +761,7 @@ vac_update_datfrozenxid(void)
heap_close(relation, AccessShareLock);
Assert(TransactionIdIsNormal(newFrozenXid));
+ Assert(MultiXactIdIsValid(newFrozenMulti));
/* Now fetch the pg_database tuple we need to update. */
relation = heap_open(DatabaseRelationId, RowExclusiveLock);
@@ -740,6 +782,13 @@ vac_update_datfrozenxid(void)
dirty = true;
}
+ /* ditto */
+ if (MultiXactIdPrecedes(dbform->datminmxid, newFrozenMulti))
+ {
+ dbform->datminmxid = newFrozenMulti;
+ dirty = true;
+ }
+
if (dirty)
heap_inplace_update(relation, tuple);
@@ -752,7 +801,7 @@ vac_update_datfrozenxid(void)
* this action will update that too.
*/
if (dirty || ForceTransactionIdLimitUpdate())
- vac_truncate_clog(newFrozenXid);
+ vac_truncate_clog(newFrozenXid, newFrozenMulti);
}
@@ -771,17 +820,19 @@ vac_update_datfrozenxid(void)
* info is stale.
*/
static void
-vac_truncate_clog(TransactionId frozenXID)
+vac_truncate_clog(TransactionId frozenXID, MultiXactId frozenMulti)
{
TransactionId myXID = GetCurrentTransactionId();
Relation relation;
HeapScanDesc scan;
HeapTuple tuple;
- Oid oldest_datoid;
+ Oid oldestxid_datoid;
+ Oid oldestmulti_datoid;
bool frozenAlreadyWrapped = false;
- /* init oldest_datoid to sync with my frozenXID */
- oldest_datoid = MyDatabaseId;
+ /* init oldest datoids to sync with my frozen values */
+ oldestxid_datoid = MyDatabaseId;
+ oldestmulti_datoid = MyDatabaseId;
/*
* Scan pg_database to compute the minimum datfrozenxid
@@ -804,13 +855,20 @@ vac_truncate_clog(TransactionId frozenXID)
Form_pg_database dbform = (Form_pg_database) GETSTRUCT(tuple);
Assert(TransactionIdIsNormal(dbform->datfrozenxid));
+ Assert(MultiXactIdIsValid(dbform->datminmxid));
if (TransactionIdPrecedes(myXID, dbform->datfrozenxid))
frozenAlreadyWrapped = true;
else if (TransactionIdPrecedes(dbform->datfrozenxid, frozenXID))
{
frozenXID = dbform->datfrozenxid;
- oldest_datoid = HeapTupleGetOid(tuple);
+ oldestxid_datoid = HeapTupleGetOid(tuple);
+ }
+
+ if (MultiXactIdPrecedes(dbform->datminmxid, frozenMulti))
+ {
+ frozenMulti = dbform->datminmxid;
+ oldestmulti_datoid = HeapTupleGetOid(tuple);
}
}
@@ -832,14 +890,18 @@ vac_truncate_clog(TransactionId frozenXID)
return;
}
- /* Truncate CLOG to the oldest frozenxid */
+ /* Truncate CLOG and Multi to the oldest computed value */
TruncateCLOG(frozenXID);
+ TruncateMultiXact(frozenMulti);
/*
- * Update the wrap limit for GetNewTransactionId. Note: this function
- * will also signal the postmaster for an(other) autovac cycle if needed.
+ * Update the wrap limit for GetNewTransactionId and creation of new
+ * MultiXactIds. Note: these functions will also signal the postmaster for
+ * an(other) autovac cycle if needed. XXX should we avoid possibly
+ * signalling twice?
*/
- SetTransactionIdLimit(frozenXID, oldest_datoid);
+ SetTransactionIdLimit(frozenXID, oldestxid_datoid);
+ MultiXactAdvanceOldest(frozenMulti, oldestmulti_datoid);
}
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c
index 8eda66364b3..5ec65ea41be 100644
--- a/src/backend/commands/vacuumlazy.c
+++ b/src/backend/commands/vacuumlazy.c
@@ -41,6 +41,7 @@
#include "access/heapam.h"
#include "access/heapam_xlog.h"
#include "access/htup_details.h"
+#include "access/multixact.h"
#include "access/transam.h"
#include "access/visibilitymap.h"
#include "catalog/storage.h"
@@ -124,6 +125,7 @@ static int elevel = -1;
static TransactionId OldestXmin;
static TransactionId FreezeLimit;
+static MultiXactId MultiXactFrzLimit;
static BufferAccessStrategy vac_strategy;
@@ -180,6 +182,7 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
double new_rel_tuples;
BlockNumber new_rel_allvisible;
TransactionId new_frozen_xid;
+ MultiXactId new_min_multi;
/* measure elapsed time iff autovacuum logging requires it */
if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
@@ -197,7 +200,8 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
vacuum_set_xid_limits(vacstmt->freeze_min_age, vacstmt->freeze_table_age,
onerel->rd_rel->relisshared,
- &OldestXmin, &FreezeLimit, &freezeTableLimit);
+ &OldestXmin, &FreezeLimit, &freezeTableLimit,
+ &MultiXactFrzLimit);
scan_all = TransactionIdPrecedesOrEquals(onerel->rd_rel->relfrozenxid,
freezeTableLimit);
@@ -267,12 +271,17 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
if (vacrelstats->scanned_pages < vacrelstats->rel_pages)
new_frozen_xid = InvalidTransactionId;
+ new_min_multi = MultiXactFrzLimit;
+ if (vacrelstats->scanned_pages < vacrelstats->rel_pages)
+ new_min_multi = InvalidMultiXactId;
+
vac_update_relstats(onerel,
new_rel_pages,
new_rel_tuples,
new_rel_allvisible,
vacrelstats->hasindex,
- new_frozen_xid);
+ new_frozen_xid,
+ new_min_multi);
/*
* Report results to the stats collector, too. An early terminated
@@ -839,7 +848,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
* Each non-removable tuple must be checked to see if it needs
* freezing. Note we already have exclusive buffer lock.
*/
- if (heap_freeze_tuple(tuple.t_data, FreezeLimit))
+ if (heap_freeze_tuple(tuple.t_data, FreezeLimit,
+ MultiXactFrzLimit))
frozen[nfrozen++] = offnum;
}
} /* scan along page */
@@ -857,7 +867,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
XLogRecPtr recptr;
recptr = log_heap_freeze(onerel, buf, FreezeLimit,
- frozen, nfrozen);
+ MultiXactFrzLimit, frozen, nfrozen);
PageSetLSN(page, recptr);
PageSetTLI(page, ThisTimeLineID);
}
@@ -1176,7 +1186,8 @@ lazy_check_needs_freeze(Buffer buf)
tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
- if (heap_tuple_needs_freeze(tupleheader, FreezeLimit, buf))
+ if (heap_tuple_needs_freeze(tupleheader, FreezeLimit,
+ MultiXactFrzLimit, buf))
return true;
} /* scan along page */
@@ -1253,7 +1264,8 @@ lazy_cleanup_index(Relation indrel,
stats->num_index_tuples,
0,
false,
- InvalidTransactionId);
+ InvalidTransactionId,
+ InvalidMultiXactId);
ereport(elevel,
(errmsg("index \"%s\" now contains %.0f row versions in %u pages",