* removed.
*/
defaultPartOid =
- get_default_oid_from_partdesc(RelationGetPartitionDesc(parent, false));
+ get_default_oid_from_partdesc(RelationGetPartitionDesc(parent, true));
if (OidIsValid(defaultPartOid))
CacheInvalidateRelcacheByRelid(defaultPartOid);
* then no locks are acquired, but caller must beware of race conditions
* against possible DROPs of child relations.
*
- * include_detached says to include all partitions, even if they're marked
- * detached. Passing it as false means they might or might not be included,
- * depending on the visibility of the pg_inherits row for the active snapshot.
+ * If a partition's pg_inherits row is marked "detach pending",
+ * *detached_exist (if not null) is set true, otherwise it is set false.
+ *
+ * If omit_detached is true and there is an active snapshot (not the same as
+ * the catalog snapshot used to scan pg_inherits!) and a pg_inherits tuple
+ * marked "detach pending" is visible to that snapshot, then that partition is
+ * omitted from the output list. This makes partitions invisible depending on
+ * whether the transaction that marked those partitions as detached appears
+ * committed to the active snapshot.
*/
List *
-find_inheritance_children(Oid parentrelId, bool include_detached,
- LOCKMODE lockmode)
+find_inheritance_children(Oid parentrelId, bool omit_detached,
+ LOCKMODE lockmode, bool *detached_exist)
{
List *list = NIL;
Relation relation;
if (!has_subclass(parentrelId))
return NIL;
+ if (detached_exist)
+ *detached_exist = false;
+
/*
* Scan pg_inherits and build a working array of subclass OIDs.
*/
{
/*
* Cope with partitions concurrently being detached. When we see a
- * partition marked "detach pending", we only include it in the set of
- * visible partitions if caller requested all detached partitions, or
- * if its pg_inherits tuple's xmin is still visible to the active
- * snapshot.
+ * partition marked "detach pending", we omit it from the returned set
+ * of visible partitions if caller requested that and the tuple's xmin
+ * does not appear in progress to the active snapshot. (If there's no
+ * active snapshot set, that means we're not running a user query, so
+ * it's OK to always include detached partitions in that case; if the
+ * xmin is still running to the active snapshot, then the partition
+ * has not been detached yet and so we include it.)
*
- * The reason for this check is that we want to avoid seeing the
+ * The reason for this hack is that we want to avoid seeing the
* partition as alive in RI queries during REPEATABLE READ or
- * SERIALIZABLE transactions. (If there's no active snapshot set,
- * that means we're not running a user query, so it's OK to always
- * include detached partitions in that case.)
+ * SERIALIZABLE transactions: such queries use a different snapshot
+ * than the one used by regular (user) queries.
*/
- if (((Form_pg_inherits) GETSTRUCT(inheritsTuple))->inhdetachpending &&
- !include_detached &&
- ActiveSnapshotSet())
+ if (((Form_pg_inherits) GETSTRUCT(inheritsTuple))->inhdetachpending)
{
- TransactionId xmin;
- Snapshot snap;
+ if (detached_exist)
+ *detached_exist = true;
+
+ if (omit_detached && ActiveSnapshotSet())
+ {
+ TransactionId xmin;
+ Snapshot snap;
- xmin = HeapTupleHeaderGetXmin(inheritsTuple->t_data);
- snap = GetActiveSnapshot();
+ xmin = HeapTupleHeaderGetXmin(inheritsTuple->t_data);
+ snap = GetActiveSnapshot();
- if (!XidInMVCCSnapshot(xmin, snap))
- continue;
+ if (!XidInMVCCSnapshot(xmin, snap))
+ continue;
+ }
}
inhrelid = ((Form_pg_inherits) GETSTRUCT(inheritsTuple))->inhrelid;
ListCell *lc;
/* Get the direct children of this rel */
- currentchildren = find_inheritance_children(currentrel, false,
- lockmode);
+ currentchildren = find_inheritance_children(currentrel, true,
+ lockmode, NULL);
/*
* Add to the queue only those children not already seen. This avoids
*/
if (partitioned && stmt->relation && !stmt->relation->inh)
{
- PartitionDesc pd = RelationGetPartitionDesc(rel, false);
+ PartitionDesc pd = RelationGetPartitionDesc(rel, true);
if (pd->nparts != 0)
flags |= INDEX_CREATE_INVALID;
*
* If we're called internally (no stmt->relation), recurse always.
*/
- partdesc = RelationGetPartitionDesc(rel, false);
+ partdesc = RelationGetPartitionDesc(rel, true);
if ((!stmt->relation || stmt->relation->inh) && partdesc->nparts > 0)
{
int nparts = partdesc->nparts;
*/
defaultPartOid =
get_default_oid_from_partdesc(RelationGetPartitionDesc(parent,
- false));
+ true));
if (OidIsValid(defaultPartOid))
defaultRel = table_open(defaultPartOid, AccessExclusiveLock);
* expected_parents will only be 0 if we are not already recursing.
*/
if (expected_parents == 0 &&
- find_inheritance_children(myrelid, false, NoLock) != NIL)
+ find_inheritance_children(myrelid, true, NoLock, NULL) != NIL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
errmsg("inherited column \"%s\" must be renamed in child tables too",
else
{
if (expected_parents == 0 &&
- find_inheritance_children(myrelid, false, NoLock) != NIL)
+ find_inheritance_children(myrelid, true, NoLock, NULL) != NIL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
errmsg("inherited constraint \"%s\" must be renamed in child tables too",
*/
if (colDef->identity &&
recurse &&
- find_inheritance_children(myrelid, false, NoLock) != NIL)
+ find_inheritance_children(myrelid, true, NoLock, NULL) != NIL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
errmsg("cannot recursively add identity column to table that has child tables")));
* use find_all_inheritors to do it in one pass.
*/
children =
- find_inheritance_children(RelationGetRelid(rel), false, lockmode);
+ find_inheritance_children(RelationGetRelid(rel), true, lockmode, NULL);
/*
* If we are told not to recurse, there had better not be any child
*/
if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
{
- PartitionDesc partdesc = RelationGetPartitionDesc(rel, false);
+ PartitionDesc partdesc = RelationGetPartitionDesc(rel, true);
Assert(partdesc != NULL);
if (partdesc->nparts > 0 && !recurse && !recursing)
* resulting state can be properly dumped and restored.
*/
if (!recurse &&
- find_inheritance_children(RelationGetRelid(rel), false, lockmode))
+ find_inheritance_children(RelationGetRelid(rel), true, lockmode, NULL))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("ALTER TABLE / DROP EXPRESSION must be applied to child tables too")));
* use find_all_inheritors to do it in one pass.
*/
children =
- find_inheritance_children(RelationGetRelid(rel), false, lockmode);
+ find_inheritance_children(RelationGetRelid(rel), true, lockmode, NULL);
if (children)
{
* use find_all_inheritors to do it in one pass.
*/
children =
- find_inheritance_children(RelationGetRelid(rel), false, lockmode);
+ find_inheritance_children(RelationGetRelid(rel), true, lockmode, NULL);
/*
* Check if ONLY was specified with ALTER TABLE. If so, allow the
*/
if (pkrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
{
- PartitionDesc pd = RelationGetPartitionDesc(pkrel, false);
+ PartitionDesc pd = RelationGetPartitionDesc(pkrel, true);
for (int i = 0; i < pd->nparts; i++)
{
}
else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
{
- PartitionDesc pd = RelationGetPartitionDesc(rel, false);
+ PartitionDesc pd = RelationGetPartitionDesc(rel, true);
/*
* Recurse to take appropriate action on each partition; either we
* use find_all_inheritors to do it in one pass.
*/
if (!is_no_inherit_constraint)
- children =
- find_inheritance_children(RelationGetRelid(rel), false, lockmode);
+ children = find_inheritance_children(RelationGetRelid(rel), true,
+ lockmode, NULL);
else
children = NIL;
}
}
else if (!recursing &&
- find_inheritance_children(RelationGetRelid(rel), false,
- NoLock) != NIL)
+ find_inheritance_children(RelationGetRelid(rel), true,
+ NoLock, NULL) != NIL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
errmsg("type of inherited column \"%s\" must be changed in child tables too",
}
else if (scanrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
{
- PartitionDesc partdesc = RelationGetPartitionDesc(scanrel, false);
+ PartitionDesc partdesc = RelationGetPartitionDesc(scanrel, true);
int i;
for (i = 0; i < partdesc->nparts; i++)
* new partition will change its partition constraint.
*/
defaultPartOid =
- get_default_oid_from_partdesc(RelationGetPartitionDesc(rel, false));
+ get_default_oid_from_partdesc(RelationGetPartitionDesc(rel, true));
if (OidIsValid(defaultPartOid))
LockRelationOid(defaultPartOid, AccessExclusiveLock);
* will change its partition constraint.
*/
defaultPartOid =
- get_default_oid_from_partdesc(RelationGetPartitionDesc(rel, false));
+ get_default_oid_from_partdesc(RelationGetPartitionDesc(rel, true));
if (OidIsValid(defaultPartOid))
{
/*
RelationGetRelationName(partIdx))));
/* Make sure it indexes a partition of the other index's table */
- partDesc = RelationGetPartitionDesc(parentTbl, false);
+ partDesc = RelationGetPartitionDesc(parentTbl, true);
found = false;
for (i = 0; i < partDesc->nparts; i++)
{
* If we found as many inherited indexes as the partitioned table has
* partitions, we're good; update pg_index to set indisvalid.
*/
- if (tuples == RelationGetPartitionDesc(partedTbl, false)->nparts)
+ if (tuples == RelationGetPartitionDesc(partedTbl, true)->nparts)
{
Relation idxRel;
HeapTuple newtup;
*/
if (partition_recurse)
{
- PartitionDesc partdesc = RelationGetPartitionDesc(rel, false);
+ PartitionDesc partdesc = RelationGetPartitionDesc(rel, true);
List *idxs = NIL;
List *childTbls = NIL;
ListCell *l;
ListCell *l;
List *idxs = NIL;
- idxs = find_inheritance_children(indexOid, false,
- ShareRowExclusiveLock);
+ idxs = find_inheritance_children(indexOid, true,
+ ShareRowExclusiveLock, NULL);
foreach(l, idxs)
childTbls = lappend_oid(childTbls,
IndexGetRelation(lfirst_oid(l),
/*
* For data modification, it is better that executor does not include
- * partitions being detached, except in snapshot-isolation mode. This
- * means that a read-committed transaction immediately gets a "no
- * partition for tuple" error when a tuple is inserted into a partition
- * that's being detached concurrently, but a transaction in repeatable-
- * read mode can still use the partition. Note that because partition
- * detach uses ShareLock on the partition (which conflicts with DML),
- * we're certain that the detach won't be able to complete until any
- * inserting transaction is done.
+ * partitions being detached, except when running in snapshot-isolation
+ * mode. This means that a read-committed transaction immediately gets a
+ * "no partition for tuple" error when a tuple is inserted into a
+ * partition that's being detached concurrently, but a transaction in
+ * repeatable-read mode can still use such a partition.
*/
if (estate->es_partition_directory == NULL)
estate->es_partition_directory =
CreatePartitionDirectory(estate->es_query_cxt,
- IsolationUsesXactSnapshot());
+ !IsolationUsesXactSnapshot());
oldcxt = MemoryContextSwitchTo(proute->memcxt);
ListCell *lc;
int i;
- /* Executor must always include detached partitions */
+ /* For data reading, executor always omits detached partitions */
if (estate->es_partition_directory == NULL)
estate->es_partition_directory =
- CreatePartitionDirectory(estate->es_query_cxt, true);
+ CreatePartitionDirectory(estate->es_query_cxt, false);
n_part_hierarchies = list_length(partitionpruneinfo->prune_infos);
Assert(n_part_hierarchies > 0);
if (root->glob->partition_directory == NULL)
{
root->glob->partition_directory =
- CreatePartitionDirectory(CurrentMemoryContext, false);
+ CreatePartitionDirectory(CurrentMemoryContext, true);
}
partdesc = PartitionDirectoryLookup(root->glob->partition_directory,
PartitionBoundSpec *spec, ParseState *pstate)
{
PartitionKey key = RelationGetPartitionKey(parent);
- PartitionDesc partdesc = RelationGetPartitionDesc(parent, true);
+ PartitionDesc partdesc = RelationGetPartitionDesc(parent, false);
PartitionBoundInfo boundinfo = partdesc->boundinfo;
int with = -1;
bool overlap = false;
{
int i;
int ndatums = 0;
- PartitionDesc pdesc = RelationGetPartitionDesc(parent, true); /* XXX correct? */
+ PartitionDesc pdesc = RelationGetPartitionDesc(parent, false);
PartitionBoundInfo boundinfo = pdesc->boundinfo;
if (boundinfo)
if (spec->is_default)
{
List *or_expr_args = NIL;
- PartitionDesc pdesc = RelationGetPartitionDesc(parent, true); /* XXX correct? */
+ PartitionDesc pdesc = RelationGetPartitionDesc(parent, false);
Oid *inhoids = pdesc->oids;
int nparts = pdesc->nparts,
i;
{
MemoryContext pdir_mcxt;
HTAB *pdir_hash;
- bool include_detached;
+ bool omit_detached;
} PartitionDirectoryData;
typedef struct PartitionDirectoryEntry
PartitionDesc pd;
} PartitionDirectoryEntry;
-static void RelationBuildPartitionDesc(Relation rel, bool include_detached);
+static PartitionDesc RelationBuildPartitionDesc(Relation rel,
+ bool omit_detached);
/*
* for callers to continue to use that pointer as long as (a) they hold the
* relation open, and (b) they hold a relation lock strong enough to ensure
* that the data doesn't become stale.
+ *
+ * The above applies to partition descriptors that are complete regarding
+ * partitions concurrently being detached. When a descriptor that omits
+ * partitions being detached is requested (and such partitions are present),
+ * said descriptor is not part of relcache and so it isn't freed by
+ * invalidations either. Caller must not use such a descriptor beyond the
+ * current Portal.
*/
PartitionDesc
-RelationGetPartitionDesc(Relation rel, bool include_detached)
+RelationGetPartitionDesc(Relation rel, bool omit_detached)
{
- if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
- return NULL;
+ Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
- if (unlikely(rel->rd_partdesc == NULL ||
- rel->rd_partdesc->includes_detached != include_detached))
- RelationBuildPartitionDesc(rel, include_detached);
+ /*
+ * If relcache has a partition descriptor, use that. However, we can only
+ * do so when we are asked to include all partitions including detached;
+ * and also when we know that there are no detached partitions.
+ */
+ if (likely(rel->rd_partdesc &&
+ (!rel->rd_partdesc->detached_exist || !omit_detached)))
+ return rel->rd_partdesc;
- return rel->rd_partdesc;
+ return RelationBuildPartitionDesc(rel, omit_detached);
}
/*
* context the current context except in very brief code sections, out of fear
* that some of our callees allocate memory on their own which would be leaked
* permanently.
+ *
+ * As a special case, partition descriptors that are requested to omit
+ * partitions being detached (and which contain such partitions) are transient
+ * and are not associated with the relcache entry. Such descriptors only last
+ * through the requesting Portal, so we use the corresponding memory context
+ * for them.
*/
-static void
-RelationBuildPartitionDesc(Relation rel, bool include_detached)
+static PartitionDesc
+RelationBuildPartitionDesc(Relation rel, bool omit_detached)
{
PartitionDesc partdesc;
PartitionBoundInfo boundinfo = NULL;
PartitionBoundSpec **boundspecs = NULL;
Oid *oids = NULL;
bool *is_leaf = NULL;
+ bool detached_exist;
ListCell *cell;
int i,
nparts;
* concurrently, whatever this function returns will be accurate as of
* some well-defined point in time.
*/
- inhoids = find_inheritance_children(RelationGetRelid(rel), include_detached,
- NoLock);
+ inhoids = find_inheritance_children(RelationGetRelid(rel), omit_detached,
+ NoLock, &detached_exist);
nparts = list_length(inhoids);
/* Allocate working arrays for OIDs, leaf flags, and boundspecs. */
partdesc = (PartitionDescData *)
MemoryContextAllocZero(new_pdcxt, sizeof(PartitionDescData));
partdesc->nparts = nparts;
+ partdesc->detached_exist = detached_exist;
/* If there are no partitions, the rest of the partdesc can stay zero */
if (nparts > 0)
{
partdesc->boundinfo = partition_bounds_copy(boundinfo, key);
partdesc->oids = (Oid *) palloc(nparts * sizeof(Oid));
partdesc->is_leaf = (bool *) palloc(nparts * sizeof(bool));
- partdesc->includes_detached = include_detached;
/*
* Assign OIDs from the original array into mapped indexes of the
}
/*
- * We have a fully valid partdesc ready to store into the relcache.
- * Reparent it so it has the right lifespan.
+ * We have a fully valid partdesc. Reparent it so that it has the right
+ * lifespan, and if appropriate put it into the relation's relcache entry.
*/
- MemoryContextSetParent(new_pdcxt, CacheMemoryContext);
+ if (omit_detached && detached_exist)
+ {
+ /*
+ * A transient partition descriptor is only good for the current
+ * statement, so make it a child of the current portal's context.
+ */
+ MemoryContextSetParent(new_pdcxt, PortalContext);
+ }
+ else
+ {
+ /*
+ * This partdesc goes into relcache.
+ */
- /*
- * But first, a kluge: if there's an old rd_pdcxt, it contains an old
- * partition descriptor that may still be referenced somewhere. Preserve
- * it, while not leaking it, by reattaching it as a child context of the
- * new rd_pdcxt. Eventually it will get dropped by either RelationClose
- * or RelationClearRelation.
- */
- if (rel->rd_pdcxt != NULL)
- MemoryContextSetParent(rel->rd_pdcxt, new_pdcxt);
- rel->rd_pdcxt = new_pdcxt;
- rel->rd_partdesc = partdesc;
+ MemoryContextSetParent(new_pdcxt, CacheMemoryContext);
+
+ /*
+ * But first, a kluge: if there's an old rd_pdcxt, it contains an old
+ * partition descriptor that may still be referenced somewhere.
+ * Preserve it, while not leaking it, by reattaching it as a child
+ * context of the new rd_pdcxt. Eventually it will get dropped by
+ * either RelationClose or RelationClearRelation.
+ */
+ if (rel->rd_pdcxt != NULL)
+ MemoryContextSetParent(rel->rd_pdcxt, new_pdcxt);
+ rel->rd_pdcxt = new_pdcxt;
+
+ /* Store it into relcache */
+ rel->rd_partdesc = partdesc;
+ }
+
+ return partdesc;
}
/*
* Create a new partition directory object.
*/
PartitionDirectory
-CreatePartitionDirectory(MemoryContext mcxt, bool include_detached)
+CreatePartitionDirectory(MemoryContext mcxt, bool omit_detached)
{
MemoryContext oldcontext = MemoryContextSwitchTo(mcxt);
PartitionDirectory pdir;
pdir->pdir_hash = hash_create("partition directory", 256, &ctl,
HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
- pdir->include_detached = include_detached;
+ pdir->omit_detached = omit_detached;
MemoryContextSwitchTo(oldcontext);
return pdir;
*/
RelationIncrementReferenceCount(rel);
pde->rel = rel;
- pde->pd = RelationGetPartitionDesc(rel, pdir->include_detached);
+ pde->pd = RelationGetPartitionDesc(rel, pdir->omit_detached);
Assert(pde->pd != NULL);
}
return pde->pd;
#define InheritsParentIndexId 2187
-extern List *find_inheritance_children(Oid parentrelId, bool include_detached,
- LOCKMODE lockmode);
+extern List *find_inheritance_children(Oid parentrelId, bool omit_detached,
+ LOCKMODE lockmode, bool *detached_exist);
extern List *find_all_inheritors(Oid parentrelId, LOCKMODE lockmode,
List **parents);
extern bool has_subclass(Oid relationId);
/*
* Information about partitions of a partitioned table.
+ *
+ * For partitioned tables where detached partitions exist, we only cache
+ * descriptors that include all partitions, including detached; when we're
+ * requested a descriptor without the detached partitions, we create one
+ * afresh each time. (The reason for this is that the set of detached
+ * partitions that are visible to each caller depends on the snapshot it has,
+ * so it's pretty much impossible to evict a descriptor from cache at the
+ * right time.)
*/
typedef struct PartitionDescData
{
int nparts; /* Number of partitions */
- bool includes_detached; /* Does it include detached partitions */
+ bool detached_exist; /* Are there any detached partitions? */
Oid *oids; /* Array of 'nparts' elements containing
* partition OIDs in order of the their bounds */
bool *is_leaf; /* Array of 'nparts' elements storing whether
} PartitionDescData;
-extern PartitionDesc RelationGetPartitionDesc(Relation rel, bool include_detached);
+extern PartitionDesc RelationGetPartitionDesc(Relation rel, bool omit_detached);
-extern PartitionDirectory CreatePartitionDirectory(MemoryContext mcxt, bool include_detached);
+extern PartitionDirectory CreatePartitionDirectory(MemoryContext mcxt, bool omit_detached);
extern PartitionDesc PartitionDirectoryLookup(PartitionDirectory, Relation);
extern void DestroyPartitionDirectory(PartitionDirectory pdir);
1
2
step s1insert: insert into d4_fk values (1);
+ERROR: insert or update on table "d4_fk" violates foreign key constraint "d4_fk_a_fkey"
step s1c: commit;
starting permutation: s2snitch s1b s1s s2detach s1cancel s3vacfreeze s1s s1insert s1c