#include "catalog/index.h"
#include "catalog/namespace.h"
#include "catalog/objectaccess.h"
+#include "catalog/partition.h"
#include "catalog/pg_am.h"
+#include "catalog/pg_inherits.h"
#include "catalog/toasting.h"
#include "commands/cluster.h"
#include "commands/defrem.h"
} RelToCluster;
+static void cluster_multiple_rels(List *rtcs, ClusterParams *params);
static void rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose);
static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
bool verbose, bool *pSwapToastByContent,
TransactionId *pFreezeXid, MultiXactId *pCutoffMulti);
static List *get_tables_to_cluster(MemoryContext cluster_context);
+static List *get_tables_to_cluster_partitioned(MemoryContext cluster_context,
+ Oid indexOid);
/*---------------------------------------------------------------------------
ListCell *lc;
ClusterParams params = {0};
bool verbose = false;
+ Relation rel = NULL;
+ Oid indexOid = InvalidOid;
+ MemoryContext cluster_context;
+ List *rtcs;
/* Parse option list */
foreach(lc, stmt->params)
if (stmt->relation != NULL)
{
/* This is the single-relation case. */
- Oid tableOid,
- indexOid = InvalidOid;
- Relation rel;
+ Oid tableOid;
- /* Find, lock, and check permissions on the table */
+ /*
+ * Find, lock, and check permissions on the table. We obtain
+ * AccessExclusiveLock right away to avoid lock-upgrade hazard in the
+ * single-transaction case.
+ */
tableOid = RangeVarGetRelidExtended(stmt->relation,
AccessExclusiveLock,
0,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot cluster temporary tables of other sessions")));
- /*
- * Reject clustering a partitioned table.
- */
- if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("cannot cluster a partitioned table")));
-
if (stmt->indexname == NULL)
{
ListCell *index;
stmt->indexname, stmt->relation->relname)));
}
- /* close relation, keep lock till commit */
- table_close(rel, NoLock);
+ if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+ {
+ /* close relation, keep lock till commit */
+ table_close(rel, NoLock);
- /* Do the job. */
- cluster_rel(tableOid, indexOid, ¶ms);
+ /* Do the job. */
+ cluster_rel(tableOid, indexOid, ¶ms);
+
+ return;
+ }
+ }
+
+ /*
+ * By here, we know we are in a multi-table situation. In order to avoid
+ * holding locks for too long, we want to process each table in its own
+ * transaction. This forces us to disallow running inside a user
+ * transaction block.
+ */
+ PreventInTransactionBlock(isTopLevel, "CLUSTER");
+
+ /* Also, we need a memory context to hold our list of relations */
+ cluster_context = AllocSetContextCreate(PortalContext,
+ "Cluster",
+ ALLOCSET_DEFAULT_SIZES);
+
+ /*
+ * Either we're processing a partitioned table, or we were not given any
+ * table name at all. In either case, obtain a list of relations to
+ * process.
+ *
+ * In the former case, an index name must have been given, so we don't
+ * need to recheck its "indisclustered" bit, but we have to check that it
+ * is an index that we can cluster on. In the latter case, we set the
+ * option bit to have indisclustered verified.
+ *
+ * Rechecking the relation itself is necessary here in all cases.
+ */
+ params.options |= CLUOPT_RECHECK;
+ if (rel != NULL)
+ {
+ Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
+ check_index_is_clusterable(rel, indexOid, true, AccessShareLock);
+ rtcs = get_tables_to_cluster_partitioned(cluster_context, indexOid);
+
+ /* close relation, releasing lock on parent table */
+ table_close(rel, AccessExclusiveLock);
}
else
{
- /*
- * This is the "multi relation" case. We need to cluster all tables
- * that have some index with indisclustered set.
- */
- MemoryContext cluster_context;
- List *rvs;
- ListCell *rv;
+ rtcs = get_tables_to_cluster(cluster_context);
+ params.options |= CLUOPT_RECHECK_ISCLUSTERED;
+ }
- /*
- * We cannot run this form of CLUSTER inside a user transaction block;
- * we'd be holding locks way too long.
- */
- PreventInTransactionBlock(isTopLevel, "CLUSTER");
+ /* Do the job. */
+ cluster_multiple_rels(rtcs, ¶ms);
- /*
- * Create special memory context for cross-transaction storage.
- *
- * Since it is a child of PortalContext, it will go away even in case
- * of error.
- */
- cluster_context = AllocSetContextCreate(PortalContext,
- "Cluster",
- ALLOCSET_DEFAULT_SIZES);
+ /* Start a new transaction for the cleanup work. */
+ StartTransactionCommand();
- /*
- * Build the list of relations to cluster. Note that this lives in
- * cluster_context.
- */
- rvs = get_tables_to_cluster(cluster_context);
+ /* Clean up working storage */
+ MemoryContextDelete(cluster_context);
+}
- /* Commit to get out of starting transaction */
- PopActiveSnapshot();
- CommitTransactionCommand();
+/*
+ * Given a list of relations to cluster, process each of them in a separate
+ * transaction.
+ *
+ * We expect to be in a transaction at start, but there isn't one when we
+ * return.
+ */
+static void
+cluster_multiple_rels(List *rtcs, ClusterParams *params)
+{
+ ListCell *lc;
- /* Ok, now that we've got them all, cluster them one by one */
- foreach(rv, rvs)
- {
- RelToCluster *rvtc = (RelToCluster *) lfirst(rv);
- ClusterParams cluster_params = params;
+ /* Commit to get out of starting transaction */
+ PopActiveSnapshot();
+ CommitTransactionCommand();
- /* Start a new transaction for each relation. */
- StartTransactionCommand();
- /* functions in indexes may want a snapshot set */
- PushActiveSnapshot(GetTransactionSnapshot());
- /* Do the job. */
- cluster_params.options |= CLUOPT_RECHECK;
- cluster_rel(rvtc->tableOid, rvtc->indexOid,
- &cluster_params);
- PopActiveSnapshot();
- CommitTransactionCommand();
- }
+ /* Cluster the tables, each in a separate transaction */
+ foreach(lc, rtcs)
+ {
+ RelToCluster *rtc = (RelToCluster *) lfirst(lc);
- /* Start a new transaction for the cleanup work. */
+ /* Start a new transaction for each relation. */
StartTransactionCommand();
- /* Clean up working storage */
- MemoryContextDelete(cluster_context);
+ /* functions in indexes may want a snapshot set */
+ PushActiveSnapshot(GetTransactionSnapshot());
+
+ /* Do the job. */
+ cluster_rel(rtc->tableOid, rtc->indexOid, params);
+
+ PopActiveSnapshot();
+ CommitTransactionCommand();
}
}
/*
* Silently skip a temp table for a remote session. Only doing this
* check in the "recheck" case is appropriate (which currently means
- * somebody is executing a database-wide CLUSTER), because there is
- * another check in cluster() which will stop any attempt to cluster
- * remote temp tables by name. There is another check in cluster_rel
- * which is redundant, but we leave it for extra safety.
+ * somebody is executing a database-wide CLUSTER or on a partitioned
+ * table), because there is another check in cluster() which will stop
+ * any attempt to cluster remote temp tables by name. There is
+ * another check in cluster_rel which is redundant, but we leave it
+ * for extra safety.
*/
if (RELATION_IS_OTHER_TEMP(OldHeap))
{
}
/*
- * Check that the index is still the one with indisclustered set.
+ * Check that the index is still the one with indisclustered set,
+ * if needed.
*/
- if (!get_index_isclustered(indexOid))
+ if ((params->options & CLUOPT_RECHECK_ISCLUSTERED) != 0 &&
+ !get_index_isclustered(indexOid))
{
relation_close(OldHeap, AccessExclusiveLock);
pgstat_progress_end_command();
return;
}
+ Assert(OldHeap->rd_rel->relkind == RELKIND_RELATION ||
+ OldHeap->rd_rel->relkind == RELKIND_MATVIEW ||
+ OldHeap->rd_rel->relkind == RELKIND_TOASTVALUE);
+
/*
* All predicate locks on the tuples or pages are about to be made
* invalid, because we move tuples around. Promote them to relation
TransactionId frozenXid;
MultiXactId cutoffMulti;
- /* Mark the correct index as clustered */
if (OidIsValid(indexOid))
+ /* Mark the correct index as clustered */
mark_index_clustered(OldHeap, indexOid, true);
/* Remember info about rel before closing OldHeap */
/*
* Reset the relrewrite for the toast. The command-counter
- * increment is required here as we are about to update
- * the tuple that is updated as part of RenameRelationInternal.
+ * increment is required here as we are about to update the tuple
+ * that is updated as part of RenameRelationInternal.
*/
CommandCounterIncrement();
ResetRelRewrite(newrel->rd_rel->reltoastrelid);
HeapTuple indexTuple;
Form_pg_index index;
MemoryContext old_context;
- RelToCluster *rvtc;
- List *rvs = NIL;
+ List *rtcs = NIL;
/*
* Get all indexes that have indisclustered set and are owned by
scan = table_beginscan_catalog(indRelation, 1, &entry);
while ((indexTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
{
+ RelToCluster *rtc;
+
index = (Form_pg_index) GETSTRUCT(indexTuple);
if (!pg_class_ownercheck(index->indrelid, GetUserId()))
continue;
- /*
- * We have to build the list in a different memory context so it will
- * survive the cross-transaction processing
- */
+ /* Use a permanent memory context for the result list */
old_context = MemoryContextSwitchTo(cluster_context);
- rvtc = (RelToCluster *) palloc(sizeof(RelToCluster));
- rvtc->tableOid = index->indrelid;
- rvtc->indexOid = index->indexrelid;
- rvs = lappend(rvs, rvtc);
+ rtc = (RelToCluster *) palloc(sizeof(RelToCluster));
+ rtc->tableOid = index->indrelid;
+ rtc->indexOid = index->indexrelid;
+ rtcs = lappend(rtcs, rtc);
MemoryContextSwitchTo(old_context);
}
relation_close(indRelation, AccessShareLock);
- return rvs;
+ return rtcs;
+}
+
+/*
+ * Given an index on a partitioned table, return a list of RelToCluster for
+ * all the children leaves tables/indexes.
+ *
+ * Caller must hold lock on the table containing the index.
+ *
+ * Like expand_vacuum_rel, but here caller must hold AccessExclusiveLock
+ * on the table already.
+ */
+static List *
+get_tables_to_cluster_partitioned(MemoryContext cluster_context, Oid indexOid)
+{
+ List *inhoids;
+ ListCell *lc;
+ List *rtcs = NIL;
+ MemoryContext old_context;
+
+ /* Do not lock the children until they're processed */
+ inhoids = find_all_inheritors(indexOid, NoLock, NULL);
+
+ /* Use a permanent memory context for the result list */
+ old_context = MemoryContextSwitchTo(cluster_context);
+
+ foreach(lc, inhoids)
+ {
+ Oid indexrelid = lfirst_oid(lc);
+ Oid relid = IndexGetRelation(indexrelid, false);
+ RelToCluster *rtc;
+
+ /* consider only leaf indexes */
+ if (get_rel_relkind(indexrelid) != RELKIND_INDEX)
+ continue;
+
+ rtc = (RelToCluster *) palloc(sizeof(RelToCluster));
+ rtc->tableOid = relid;
+ rtc->indexOid = indexrelid;
+ rtcs = lappend(rtcs, rtc);
+ }
+
+ MemoryContextSwitchTo(old_context);
+ return rtcs;
}
---------
(0 rows)
+-- Verify that toast tables are clusterable
+CLUSTER pg_toast.pg_toast_826 USING pg_toast_826_index;
-- Verify that clustering all tables does in fact cluster the right ones
CREATE USER regress_clstr_user;
CREATE TABLE clstr_1 (a INT PRIMARY KEY);
CREATE TABLE clustertest (f1 int PRIMARY KEY);
CLUSTER clustertest USING clustertest_pkey;
CLUSTER clustertest;
--- Check that partitioned tables cannot be clustered
+-- Check that partitioned tables can be clustered
CREATE TABLE clstrpart (a int) PARTITION BY RANGE (a);
+CREATE TABLE clstrpart1 PARTITION OF clstrpart FOR VALUES FROM (1) TO (10) PARTITION BY RANGE (a);
+CREATE TABLE clstrpart11 PARTITION OF clstrpart1 FOR VALUES FROM (1) TO (5);
+CREATE TABLE clstrpart12 PARTITION OF clstrpart1 FOR VALUES FROM (5) TO (10) PARTITION BY RANGE (a);
+CREATE TABLE clstrpart2 PARTITION OF clstrpart FOR VALUES FROM (10) TO (20);
+CREATE TABLE clstrpart3 PARTITION OF clstrpart DEFAULT PARTITION BY RANGE (a);
+CREATE TABLE clstrpart33 PARTITION OF clstrpart3 DEFAULT;
+CREATE INDEX clstrpart_only_idx ON ONLY clstrpart (a);
+CLUSTER clstrpart USING clstrpart_only_idx; -- fails
+ERROR: cannot cluster on invalid index "clstrpart_only_idx"
+DROP INDEX clstrpart_only_idx;
CREATE INDEX clstrpart_idx ON clstrpart (a);
+-- Check that clustering sets new relfilenodes:
+CREATE TEMP TABLE old_cluster_info AS SELECT relname, level, relfilenode, relkind FROM pg_partition_tree('clstrpart'::regclass) AS tree JOIN pg_class c ON c.oid=tree.relid ;
+CLUSTER clstrpart USING clstrpart_idx;
+CREATE TEMP TABLE new_cluster_info AS SELECT relname, level, relfilenode, relkind FROM pg_partition_tree('clstrpart'::regclass) AS tree JOIN pg_class c ON c.oid=tree.relid ;
+SELECT relname, old.level, old.relkind, old.relfilenode = new.relfilenode FROM old_cluster_info AS old JOIN new_cluster_info AS new USING (relname) ORDER BY relname COLLATE "C";
+ relname | level | relkind | ?column?
+-------------+-------+---------+----------
+ clstrpart | 0 | p | t
+ clstrpart1 | 1 | p | t
+ clstrpart11 | 2 | r | f
+ clstrpart12 | 2 | p | t
+ clstrpart2 | 1 | r | f
+ clstrpart3 | 1 | p | t
+ clstrpart33 | 2 | r | f
+(7 rows)
+
+-- Partitioned indexes aren't and can't be marked un/clustered:
+\d clstrpart
+ Partitioned table "public.clstrpart"
+ Column | Type | Collation | Nullable | Default
+--------+---------+-----------+----------+---------
+ a | integer | | |
+Partition key: RANGE (a)
+Indexes:
+ "clstrpart_idx" btree (a)
+Number of partitions: 3 (Use \d+ to list them.)
+
+CLUSTER clstrpart;
+ERROR: there is no previously clustered index for table "clstrpart"
+ALTER TABLE clstrpart SET WITHOUT CLUSTER;
+ERROR: cannot mark index clustered in partitioned table
ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
ERROR: cannot mark index clustered in partitioned table
-CLUSTER clstrpart USING clstrpart_idx;
-ERROR: cannot cluster a partitioned table
DROP TABLE clstrpart;
-- Test CLUSTER with external tuplesorting
create table clstr_4 as select * from tenk1;