diff options
| author | Alvaro Herrera | 2021-04-28 19:44:35 +0000 |
|---|---|---|
| committer | Alvaro Herrera | 2021-04-28 19:44:35 +0000 |
| commit | d6b8d29419df0efad57f95c80b871745d1b55da6 (patch) | |
| tree | 04627e39b8fc7d19c55179eb4bdd898d7fc9aa29 /src/backend | |
| parent | c93f8f3b8d3bc780892e2bf11192fbdd136fddfe (diff) | |
Allow a partdesc-omitting-partitions to be cached
Makes partition descriptor acquisition faster during the transient
period in which a partition is in the process of being detached.
This also adds the restriction that only one partition can be in
pending-detach state for a partitioned table.
While at it, return find_inheritance_children() API to what it was
before 71f4c8c6f74b, and create a separate
find_inheritance_children_extended() that returns detailed info about
detached partitions.
(This incidentally fixes a bug in 8aba9322511 whereby a memory context
holding a transient partdesc is reparented to a NULL PortalContext,
leading to permanent leak of that memory. The fix is to no longer rely
on reparenting contexts to PortalContext. Reported by Amit Langote.)
Per gripe from Amit Langote
Discussion: https://postgr.es/m/CA+HiwqFgpP1LxJZOBYGt9rpvTjXXkg5qG2+Xch2Z1Q7KrqZR1A@mail.gmail.com
Diffstat (limited to 'src/backend')
| -rw-r--r-- | src/backend/catalog/pg_inherits.c | 52 | ||||
| -rw-r--r-- | src/backend/commands/tablecmds.c | 66 | ||||
| -rw-r--r-- | src/backend/commands/trigger.c | 3 | ||||
| -rw-r--r-- | src/backend/partitioning/partdesc.c | 101 | ||||
| -rw-r--r-- | src/backend/utils/cache/relcache.c | 33 |
5 files changed, 186 insertions, 69 deletions
diff --git a/src/backend/catalog/pg_inherits.c b/src/backend/catalog/pg_inherits.c index 6447b528546..c373faf2d64 100644 --- a/src/backend/catalog/pg_inherits.c +++ b/src/backend/catalog/pg_inherits.c @@ -52,6 +52,22 @@ typedef struct SeenRelsEntry * then no locks are acquired, but caller must beware of race conditions * against possible DROPs of child relations. * + * Partitions marked as being detached are omitted; see + * find_inheritance_children_extended for details. + */ +List * +find_inheritance_children(Oid parentrelId, LOCKMODE lockmode) +{ + return find_inheritance_children_extended(parentrelId, true, lockmode, + NULL, NULL); +} + +/* + * find_inheritance_children_extended + * + * As find_inheritance_children, with more options regarding detached + * partitions. + * * If a partition's pg_inherits row is marked "detach pending", * *detached_exist (if not null) is set true. * @@ -60,11 +76,13 @@ typedef struct SeenRelsEntry * marked "detach pending" is visible to that snapshot, then that partition is * omitted from the output list. This makes partitions invisible depending on * whether the transaction that marked those partitions as detached appears - * committed to the active snapshot. + * committed to the active snapshot. In addition, *detached_xmin (if not null) + * is set to the xmin of the row of the detached partition. */ List * -find_inheritance_children(Oid parentrelId, bool omit_detached, - LOCKMODE lockmode, bool *detached_exist) +find_inheritance_children_extended(Oid parentrelId, bool omit_detached, + LOCKMODE lockmode, bool *detached_exist, + TransactionId *detached_xmin) { List *list = NIL; Relation relation; @@ -132,7 +150,32 @@ find_inheritance_children(Oid parentrelId, bool omit_detached, snap = GetActiveSnapshot(); if (!XidInMVCCSnapshot(xmin, snap)) + { + if (detached_xmin) + { + /* + * Two detached partitions should not occur (see + * checks in MarkInheritDetached), but if they do, + * track the newer of the two. Make sure to warn the + * user, so that they can clean up. Since this is + * just a cross-check against potentially corrupt + * catalogs, we don't make it a full-fledged error + * message. + */ + if (*detached_xmin != InvalidTransactionId) + { + elog(WARNING, "more than one partition pending detach found for table with OID %u", + parentrelId); + if (TransactionIdFollows(xmin, *detached_xmin)) + *detached_xmin = xmin; + } + else + *detached_xmin = xmin; + } + + /* Don't add the partition to the output list */ continue; + } } } @@ -247,8 +290,7 @@ find_all_inheritors(Oid parentrelId, LOCKMODE lockmode, List **numparents) ListCell *lc; /* Get the direct children of this rel */ - currentchildren = find_inheritance_children(currentrel, true, - lockmode, NULL); + currentchildren = find_inheritance_children(currentrel, lockmode); /* * Add to the queue only those children not already seen. This avoids diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 8e717ada28d..d9ba87a2a3a 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -3492,7 +3492,7 @@ renameatt_internal(Oid myrelid, * expected_parents will only be 0 if we are not already recursing. */ if (expected_parents == 0 && - find_inheritance_children(myrelid, true, NoLock, NULL) != NIL) + find_inheritance_children(myrelid, NoLock) != NIL) ereport(ERROR, (errcode(ERRCODE_INVALID_TABLE_DEFINITION), errmsg("inherited column \"%s\" must be renamed in child tables too", @@ -3691,7 +3691,7 @@ rename_constraint_internal(Oid myrelid, else { if (expected_parents == 0 && - find_inheritance_children(myrelid, true, NoLock, NULL) != NIL) + find_inheritance_children(myrelid, NoLock) != NIL) ereport(ERROR, (errcode(ERRCODE_INVALID_TABLE_DEFINITION), errmsg("inherited constraint \"%s\" must be renamed in child tables too", @@ -6565,7 +6565,7 @@ ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel, */ if (colDef->identity && recurse && - find_inheritance_children(myrelid, true, NoLock, NULL) != NIL) + find_inheritance_children(myrelid, NoLock) != NIL) ereport(ERROR, (errcode(ERRCODE_INVALID_TABLE_DEFINITION), errmsg("cannot recursively add identity column to table that has child tables"))); @@ -6811,7 +6811,7 @@ ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel, * use find_all_inheritors to do it in one pass. */ children = - find_inheritance_children(RelationGetRelid(rel), true, lockmode, NULL); + find_inheritance_children(RelationGetRelid(rel), lockmode); /* * If we are told not to recurse, there had better not be any child @@ -7674,7 +7674,7 @@ ATPrepDropExpression(Relation rel, AlterTableCmd *cmd, bool recurse, bool recurs * resulting state can be properly dumped and restored. */ if (!recurse && - find_inheritance_children(RelationGetRelid(rel), true, lockmode, NULL)) + find_inheritance_children(RelationGetRelid(rel), lockmode)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("ALTER TABLE / DROP EXPRESSION must be applied to child tables too"))); @@ -8282,7 +8282,7 @@ ATExecDropColumn(List **wqueue, Relation rel, const char *colName, * use find_all_inheritors to do it in one pass. */ children = - find_inheritance_children(RelationGetRelid(rel), true, lockmode, NULL); + find_inheritance_children(RelationGetRelid(rel), lockmode); if (children) { @@ -8770,7 +8770,7 @@ ATAddCheckConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel, * use find_all_inheritors to do it in one pass. */ children = - find_inheritance_children(RelationGetRelid(rel), true, lockmode, NULL); + find_inheritance_children(RelationGetRelid(rel), lockmode); /* * Check if ONLY was specified with ALTER TABLE. If so, allow the @@ -11303,8 +11303,7 @@ ATExecDropConstraint(Relation rel, const char *constrName, * use find_all_inheritors to do it in one pass. */ if (!is_no_inherit_constraint) - children = find_inheritance_children(RelationGetRelid(rel), true, - lockmode, NULL); + children = find_inheritance_children(RelationGetRelid(rel), lockmode); else children = NIL; @@ -11688,8 +11687,7 @@ ATPrepAlterColumnType(List **wqueue, } } else if (!recursing && - find_inheritance_children(RelationGetRelid(rel), true, - NoLock, NULL) != NIL) + find_inheritance_children(RelationGetRelid(rel), NoLock) != NIL) ereport(ERROR, (errcode(ERRCODE_INVALID_TABLE_DEFINITION), errmsg("type of inherited column \"%s\" must be changed in child tables too", @@ -14601,7 +14599,8 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode) * MarkInheritDetached * * Set inhdetachpending for a partition, for ATExecDetachPartition - * in concurrent mode. + * in concurrent mode. While at it, verify that no other partition is + * already pending detach. */ static void MarkInheritDetached(Relation child_rel, Relation parent_rel) @@ -14617,32 +14616,45 @@ MarkInheritDetached(Relation child_rel, Relation parent_rel) Assert(parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE); /* - * Find pg_inherits entries by inhrelid. + * Find pg_inherits entries by inhparent. (We need to scan them all in + * order to verify that no other partition is pending detach.) */ catalogRelation = table_open(InheritsRelationId, RowExclusiveLock); ScanKeyInit(&key, - Anum_pg_inherits_inhrelid, + Anum_pg_inherits_inhparent, BTEqualStrategyNumber, F_OIDEQ, - ObjectIdGetDatum(RelationGetRelid(child_rel))); - scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId, + ObjectIdGetDatum(RelationGetRelid(parent_rel))); + scan = systable_beginscan(catalogRelation, InheritsParentIndexId, true, NULL, 1, &key); while (HeapTupleIsValid(inheritsTuple = systable_getnext(scan))) { - HeapTuple newtup; + Form_pg_inherits inhForm; - if (((Form_pg_inherits) GETSTRUCT(inheritsTuple))->inhparent != - RelationGetRelid(parent_rel)) - elog(ERROR, "bad parent tuple found for partition %u", - RelationGetRelid(child_rel)); + inhForm = (Form_pg_inherits) GETSTRUCT(inheritsTuple); + if (inhForm->inhdetachpending) + ereport(ERROR, + errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("partition \"%s\" already pending detach in partitioned table \"%s.%s\"", + get_rel_name(inhForm->inhrelid), + get_namespace_name(parent_rel->rd_rel->relnamespace), + RelationGetRelationName(parent_rel)), + errhint("Use ALTER TABLE ... DETACH PARTITION ... FINALIZE to complete the detach operation.")); + + if (inhForm->inhrelid == RelationGetRelid(child_rel)) + { + HeapTuple newtup; - newtup = heap_copytuple(inheritsTuple); - ((Form_pg_inherits) GETSTRUCT(newtup))->inhdetachpending = true; + newtup = heap_copytuple(inheritsTuple); + ((Form_pg_inherits) GETSTRUCT(newtup))->inhdetachpending = true; - CatalogTupleUpdate(catalogRelation, - &inheritsTuple->t_self, - newtup); - found = true; + CatalogTupleUpdate(catalogRelation, + &inheritsTuple->t_self, + newtup); + found = true; + heap_freetuple(newtup); + /* keep looking, to ensure we catch others pending detach */ + } } /* Done */ diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index d8393aa4de3..f305f8bc0f2 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -1141,8 +1141,7 @@ CreateTrigger(CreateTrigStmt *stmt, const char *queryString, ListCell *l; List *idxs = NIL; - idxs = find_inheritance_children(indexOid, true, - ShareRowExclusiveLock, NULL); + idxs = find_inheritance_children(indexOid, ShareRowExclusiveLock); foreach(l, idxs) childTbls = lappend_oid(childTbls, IndexGetRelation(lfirst_oid(l), diff --git a/src/backend/partitioning/partdesc.c b/src/backend/partitioning/partdesc.c index 2305dff4077..fa9729283a0 100644 --- a/src/backend/partitioning/partdesc.c +++ b/src/backend/partitioning/partdesc.c @@ -54,6 +54,12 @@ static PartitionDesc RelationBuildPartitionDesc(Relation rel, /* * RelationGetPartitionDesc -- get partition descriptor, if relation is partitioned * + * We keep two partdescs in relcache: rd_partdesc includes all partitions + * (even those being concurrently marked detached), while rd_partdesc_nodetach + * omits (some of) those. We store the pg_inherits.xmin value for the latter, + * to determine whether it can be validly reused in each case, since that + * depends on the active snapshot. + * * Note: we arrange for partition descriptors to not get freed until the * relcache entry's refcount goes to zero (see hacks in RelationClose, * RelationClearRelation, and RelationBuildPartitionDesc). Therefore, even @@ -61,13 +67,6 @@ static PartitionDesc RelationBuildPartitionDesc(Relation rel, * for callers to continue to use that pointer as long as (a) they hold the * relation open, and (b) they hold a relation lock strong enough to ensure * that the data doesn't become stale. - * - * The above applies to partition descriptors that are complete regarding - * partitions concurrently being detached. When a descriptor that omits - * partitions being detached is requested (and such partitions are present), - * said descriptor is not part of relcache and so it isn't freed by - * invalidations either. Caller must not use such a descriptor beyond the - * current Portal. */ PartitionDesc RelationGetPartitionDesc(Relation rel, bool omit_detached) @@ -78,11 +77,36 @@ RelationGetPartitionDesc(Relation rel, bool omit_detached) * If relcache has a partition descriptor, use that. However, we can only * do so when we are asked to include all partitions including detached; * and also when we know that there are no detached partitions. + * + * If there is no active snapshot, detached partitions aren't omitted + * either, so we can use the cached descriptor too in that case. */ if (likely(rel->rd_partdesc && - (!rel->rd_partdesc->detached_exist || !omit_detached))) + (!rel->rd_partdesc->detached_exist || !omit_detached || + !ActiveSnapshotSet()))) return rel->rd_partdesc; + /* + * If we're asked to omit detached partitions, we may be able to use a + * cached descriptor too. We determine that based on the pg_inherits.xmin + * that was saved alongside that descriptor: if the xmin that was not in + * progress for that active snapshot is also not in progress for the + * current active snapshot, then we can use use it. Otherwise build one + * from scratch. + */ + if (omit_detached && + rel->rd_partdesc_nodetached && + TransactionIdIsValid(rel->rd_partdesc_nodetached_xmin) && + ActiveSnapshotSet()) + { + Snapshot activesnap; + + activesnap = GetActiveSnapshot(); + + if (!XidInMVCCSnapshot(rel->rd_partdesc_nodetached_xmin, activesnap)) + return rel->rd_partdesc_nodetached; + } + return RelationBuildPartitionDesc(rel, omit_detached); } @@ -117,6 +141,8 @@ RelationBuildPartitionDesc(Relation rel, bool omit_detached) Oid *oids = NULL; bool *is_leaf = NULL; bool detached_exist; + bool is_omit; + TransactionId detached_xmin; ListCell *cell; int i, nparts; @@ -132,8 +158,11 @@ RelationBuildPartitionDesc(Relation rel, bool omit_detached) * some well-defined point in time. */ detached_exist = false; - inhoids = find_inheritance_children(RelationGetRelid(rel), omit_detached, - NoLock, &detached_exist); + detached_xmin = InvalidTransactionId; + inhoids = find_inheritance_children_extended(RelationGetRelid(rel), + omit_detached, NoLock, + &detached_exist, + &detached_xmin); nparts = list_length(inhoids); /* Allocate working arrays for OIDs, leaf flags, and boundspecs. */ @@ -281,37 +310,49 @@ RelationBuildPartitionDesc(Relation rel, bool omit_detached) } /* + * Are we working with the partition that omits detached partitions, or + * the one that includes them? + */ + is_omit = omit_detached && detached_exist && ActiveSnapshotSet(); + + /* * We have a fully valid partdesc. Reparent it so that it has the right - * lifespan, and if appropriate put it into the relation's relcache entry. + * lifespan. */ - if (omit_detached && detached_exist) + MemoryContextSetParent(new_pdcxt, CacheMemoryContext); + + /* + * Store it into relcache. + * + * But first, a kluge: if there's an old context for this type of + * descriptor, it contains an old partition descriptor that may still be + * referenced somewhere. Preserve it, while not leaking it, by + * reattaching it as a child context of the new one. Eventually it will + * get dropped by either RelationClose or RelationClearRelation. + * (We keep the regular partdesc in rd_pdcxt, and the partdesc-excluding- + * detached-partitions in rd_pddcxt.) + */ + if (is_omit) { + if (rel->rd_pddcxt != NULL) + MemoryContextSetParent(rel->rd_pddcxt, new_pdcxt); + rel->rd_pddcxt = new_pdcxt; + rel->rd_partdesc_nodetached = partdesc; + /* - * A transient partition descriptor is only good for the current - * statement, so make it a child of the current portal's context. + * For partdescs built excluding detached partitions, which we save + * separately, we also record the pg_inherits.xmin of the detached + * partition that was omitted; this informs a future potential user of + * such a cached partdescs. (This might be InvalidXid; see comments + * in struct RelationData). */ - MemoryContextSetParent(new_pdcxt, PortalContext); + rel->rd_partdesc_nodetached_xmin = detached_xmin; } else { - /* - * This partdesc goes into relcache. - */ - - MemoryContextSetParent(new_pdcxt, CacheMemoryContext); - - /* - * But first, a kluge: if there's an old rd_pdcxt, it contains an old - * partition descriptor that may still be referenced somewhere. - * Preserve it, while not leaking it, by reattaching it as a child - * context of the new rd_pdcxt. Eventually it will get dropped by - * either RelationClose or RelationClearRelation. - */ if (rel->rd_pdcxt != NULL) MemoryContextSetParent(rel->rd_pdcxt, new_pdcxt); rel->rd_pdcxt = new_pdcxt; - - /* Store it into relcache */ rel->rd_partdesc = partdesc; } diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 466c28d5287..31c8e07f2a7 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -1157,7 +1157,10 @@ RelationBuildDesc(Oid targetRelId, bool insertIt) relation->rd_partkey = NULL; relation->rd_partkeycxt = NULL; relation->rd_partdesc = NULL; + relation->rd_partdesc_nodetached = NULL; + relation->rd_partdesc_nodetached_xmin = InvalidTransactionId; relation->rd_pdcxt = NULL; + relation->rd_pddcxt = NULL; relation->rd_partcheck = NIL; relation->rd_partcheckvalid = false; relation->rd_partcheckcxt = NULL; @@ -2103,10 +2106,16 @@ RelationClose(Relation relation) * stale partition descriptors it has. This is unlikely, so check to see * if there are child contexts before expending a call to mcxt.c. */ - if (RelationHasReferenceCountZero(relation) && - relation->rd_pdcxt != NULL && - relation->rd_pdcxt->firstchild != NULL) - MemoryContextDeleteChildren(relation->rd_pdcxt); + if (RelationHasReferenceCountZero(relation)) + { + if (relation->rd_pdcxt != NULL && + relation->rd_pdcxt->firstchild != NULL) + MemoryContextDeleteChildren(relation->rd_pdcxt); + + if (relation->rd_pddcxt != NULL && + relation->rd_pddcxt->firstchild != NULL) + MemoryContextDeleteChildren(relation->rd_pddcxt); + } #ifdef RELCACHE_FORCE_RELEASE if (RelationHasReferenceCountZero(relation) && @@ -2390,6 +2399,8 @@ RelationDestroyRelation(Relation relation, bool remember_tupdesc) MemoryContextDelete(relation->rd_partkeycxt); if (relation->rd_pdcxt) MemoryContextDelete(relation->rd_pdcxt); + if (relation->rd_pddcxt) + MemoryContextDelete(relation->rd_pddcxt); if (relation->rd_partcheckcxt) MemoryContextDelete(relation->rd_partcheckcxt); pfree(relation); @@ -2644,7 +2655,7 @@ RelationClearRelation(Relation relation, bool rebuild) SWAPFIELD(PartitionKey, rd_partkey); SWAPFIELD(MemoryContext, rd_partkeycxt); } - if (newrel->rd_pdcxt != NULL) + if (newrel->rd_pdcxt != NULL || newrel->rd_pddcxt != NULL) { /* * We are rebuilding a partitioned relation with a non-zero @@ -2672,13 +2683,22 @@ RelationClearRelation(Relation relation, bool rebuild) * newrel. */ relation->rd_partdesc = NULL; /* ensure rd_partdesc is invalid */ + relation->rd_partdesc_nodetached = NULL; + relation->rd_partdesc_nodetached_xmin = InvalidTransactionId; if (relation->rd_pdcxt != NULL) /* probably never happens */ MemoryContextSetParent(newrel->rd_pdcxt, relation->rd_pdcxt); else relation->rd_pdcxt = newrel->rd_pdcxt; + if (relation->rd_pddcxt != NULL) + MemoryContextSetParent(newrel->rd_pddcxt, relation->rd_pddcxt); + else + relation->rd_pddcxt = newrel->rd_pddcxt; /* drop newrel's pointers so we don't destroy it below */ newrel->rd_partdesc = NULL; + newrel->rd_partdesc_nodetached = NULL; + newrel->rd_partdesc_nodetached_xmin = InvalidTransactionId; newrel->rd_pdcxt = NULL; + newrel->rd_pddcxt = NULL; } #undef SWAPFIELD @@ -6017,7 +6037,10 @@ load_relcache_init_file(bool shared) rel->rd_partkey = NULL; rel->rd_partkeycxt = NULL; rel->rd_partdesc = NULL; + rel->rd_partdesc_nodetached = NULL; + rel->rd_partdesc_nodetached_xmin = InvalidTransactionId; rel->rd_pdcxt = NULL; + rel->rd_pddcxt = NULL; rel->rd_partcheck = NIL; rel->rd_partcheckvalid = false; rel->rd_partcheckcxt = NULL; |
