diff options
| author | Peter Eisentraut | 2019-03-29 07:25:20 +0000 |
|---|---|---|
| committer | Peter Eisentraut | 2019-03-29 07:26:33 +0000 |
| commit | 5dc92b844e680c54a7ecd68de0ba53c949c3d605 (patch) | |
| tree | a6cd95f2b00c7568491e2feebbfb4b8c58c3b51b /src/backend | |
| parent | d25f519107bff602e1ebc81853fe592d020c118d (diff) | |
REINDEX CONCURRENTLY
This adds the CONCURRENTLY option to the REINDEX command. A REINDEX
CONCURRENTLY on a specific index creates a new index (like CREATE
INDEX CONCURRENTLY), then renames the old index away and the new index
in place and adjusts the dependencies, and then drops the old
index (like DROP INDEX CONCURRENTLY). The REINDEX command also has
the capability to run its other variants (TABLE, DATABASE) with the
CONCURRENTLY option (but not SYSTEM).
The reindexdb command gets the --concurrently option.
Author: Michael Paquier, Andreas Karlsson, Peter Eisentraut
Reviewed-by: Andres Freund, Fujii Masao, Jim Nasby, Sergei Kornilov
Discussion: https://www.postgresql.org/message-id/flat/60052986-956b-4478-45ed-8bd119e9b9cf%402ndquadrant.com#74948a1044c56c5e817a5050f554ddee
Diffstat (limited to 'src/backend')
| -rw-r--r-- | src/backend/catalog/dependency.c | 7 | ||||
| -rw-r--r-- | src/backend/catalog/index.c | 504 | ||||
| -rw-r--r-- | src/backend/catalog/pg_depend.c | 143 | ||||
| -rw-r--r-- | src/backend/commands/indexcmds.c | 889 | ||||
| -rw-r--r-- | src/backend/commands/tablecmds.c | 32 | ||||
| -rw-r--r-- | src/backend/nodes/copyfuncs.c | 1 | ||||
| -rw-r--r-- | src/backend/nodes/equalfuncs.c | 1 | ||||
| -rw-r--r-- | src/backend/parser/gram.y | 22 | ||||
| -rw-r--r-- | src/backend/tcop/utility.c | 10 |
9 files changed, 1450 insertions, 159 deletions
diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index f7acb4103eb..7af1670c0d2 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -306,6 +306,10 @@ deleteObjectsInList(ObjectAddresses *targetObjects, Relation *depRel, * PERFORM_DELETION_SKIP_EXTENSIONS: do not delete extensions, even when * deleting objects that are part of an extension. This should generally * be used only when dropping temporary objects. + * + * PERFORM_DELETION_CONCURRENT_LOCK: perform the drop normally but with a lock + * as if it were concurrent. This is used by REINDEX CONCURRENTLY. + * */ void performDeletion(const ObjectAddress *object, @@ -1316,9 +1320,10 @@ doDeletion(const ObjectAddress *object, int flags) relKind == RELKIND_PARTITIONED_INDEX) { bool concurrent = ((flags & PERFORM_DELETION_CONCURRENTLY) != 0); + bool concurrent_lock_mode = ((flags & PERFORM_DELETION_CONCURRENT_LOCK) != 0); Assert(object->objectSubId == 0); - index_drop(object->objectId, concurrent); + index_drop(object->objectId, concurrent, concurrent_lock_mode); } else { diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 337361a6522..0d9d405c548 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -42,6 +42,7 @@ #include "catalog/pg_am.h" #include "catalog/pg_collation.h" #include "catalog/pg_constraint.h" +#include "catalog/pg_description.h" #include "catalog/pg_depend.h" #include "catalog/pg_inherits.h" #include "catalog/pg_operator.h" @@ -778,11 +779,11 @@ index_create(Relation heapRelation, errmsg("user-defined indexes on system catalog tables are not supported"))); /* - * concurrent index build on a system catalog is unsafe because we tend to - * release locks before committing in catalogs + * Concurrent index build on a system catalog is unsafe because we tend to + * release locks before committing in catalogs. */ if (concurrent && - IsSystemRelation(heapRelation)) + IsCatalogRelation(heapRelation)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("concurrent index creation on system catalog tables is not supported"))); @@ -1202,6 +1203,462 @@ index_create(Relation heapRelation, } /* + * index_concurrently_create_copy + * + * Create concurrently an index based on the definition of the one provided by + * caller. The index is inserted into catalogs and needs to be built later + * on. This is called during concurrent reindex processing. + */ +Oid +index_concurrently_create_copy(Relation heapRelation, Oid oldIndexId, const char *newName) +{ + Relation indexRelation; + IndexInfo *indexInfo; + Oid newIndexId = InvalidOid; + HeapTuple indexTuple, + classTuple; + Datum indclassDatum, + colOptionDatum, + optionDatum; + oidvector *indclass; + int2vector *indcoloptions; + bool isnull; + List *indexColNames = NIL; + + indexRelation = index_open(oldIndexId, RowExclusiveLock); + + /* New index uses the same index information as old index */ + indexInfo = BuildIndexInfo(indexRelation); + + /* Get the array of class and column options IDs from index info */ + indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(oldIndexId)); + if (!HeapTupleIsValid(indexTuple)) + elog(ERROR, "cache lookup failed for index %u", oldIndexId); + indclassDatum = SysCacheGetAttr(INDEXRELID, indexTuple, + Anum_pg_index_indclass, &isnull); + Assert(!isnull); + indclass = (oidvector *) DatumGetPointer(indclassDatum); + + colOptionDatum = SysCacheGetAttr(INDEXRELID, indexTuple, + Anum_pg_index_indoption, &isnull); + Assert(!isnull); + indcoloptions = (int2vector *) DatumGetPointer(colOptionDatum); + + /* Fetch options of index if any */ + classTuple = SearchSysCache1(RELOID, oldIndexId); + if (!HeapTupleIsValid(classTuple)) + elog(ERROR, "cache lookup failed for relation %u", oldIndexId); + optionDatum = SysCacheGetAttr(RELOID, classTuple, + Anum_pg_class_reloptions, &isnull); + + /* + * Extract the list of column names to be used for the index + * creation. + */ + for (int i = 0; i < indexInfo->ii_NumIndexAttrs; i++) + { + TupleDesc indexTupDesc = RelationGetDescr(indexRelation); + Form_pg_attribute att = TupleDescAttr(indexTupDesc, i); + + indexColNames = lappend(indexColNames, NameStr(att->attname)); + } + + /* Now create the new index */ + newIndexId = index_create(heapRelation, + newName, + InvalidOid, /* indexRelationId */ + InvalidOid, /* parentIndexRelid */ + InvalidOid, /* parentConstraintId */ + InvalidOid, /* relFileNode */ + indexInfo, + indexColNames, + indexRelation->rd_rel->relam, + indexRelation->rd_rel->reltablespace, + indexRelation->rd_indcollation, + indclass->values, + indcoloptions->values, + optionDatum, + INDEX_CREATE_SKIP_BUILD | INDEX_CREATE_CONCURRENT, + 0, + true, /* allow table to be a system catalog? */ + false, /* is_internal? */ + NULL); + + /* Close the relations used and clean up */ + index_close(indexRelation, NoLock); + ReleaseSysCache(indexTuple); + ReleaseSysCache(classTuple); + + return newIndexId; +} + +/* + * index_concurrently_build + * + * Build index for a concurrent operation. Low-level locks are taken when + * this operation is performed to prevent only schema changes, but they need + * to be kept until the end of the transaction performing this operation. + * 'indexOid' refers to an index relation OID already created as part of + * previous processing, and 'heapOid' refers to its parent heap relation. + */ +void +index_concurrently_build(Oid heapRelationId, + Oid indexRelationId) +{ + Relation heapRel; + Relation indexRelation; + IndexInfo *indexInfo; + + /* This had better make sure that a snapshot is active */ + Assert(ActiveSnapshotSet()); + + /* Open and lock the parent heap relation */ + heapRel = table_open(heapRelationId, ShareUpdateExclusiveLock); + + /* And the target index relation */ + indexRelation = index_open(indexRelationId, RowExclusiveLock); + + /* + * We have to re-build the IndexInfo struct, since it was lost in the + * commit of the transaction where this concurrent index was created at + * the catalog level. + */ + indexInfo = BuildIndexInfo(indexRelation); + Assert(!indexInfo->ii_ReadyForInserts); + indexInfo->ii_Concurrent = true; + indexInfo->ii_BrokenHotChain = false; + + /* Now build the index */ + index_build(heapRel, indexRelation, indexInfo, false, true); + + /* Close both the relations, but keep the locks */ + table_close(heapRel, NoLock); + index_close(indexRelation, NoLock); + + /* + * Update the pg_index row to mark the index as ready for inserts. Once we + * commit this transaction, any new transactions that open the table must + * insert new entries into the index for insertions and non-HOT updates. + */ + index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY); +} + +/* + * index_concurrently_swap + * + * Swap name, dependencies, and constraints of the old index over to the new + * index, while marking the old index as invalid and the new as valid. + */ +void +index_concurrently_swap(Oid newIndexId, Oid oldIndexId, const char *oldName) +{ + Relation pg_class, + pg_index, + pg_constraint, + pg_trigger; + Relation oldClassRel, + newClassRel; + HeapTuple oldClassTuple, + newClassTuple; + Form_pg_class oldClassForm, + newClassForm; + HeapTuple oldIndexTuple, + newIndexTuple; + Form_pg_index oldIndexForm, + newIndexForm; + Oid indexConstraintOid; + List *constraintOids = NIL; + ListCell *lc; + + /* + * Take a necessary lock on the old and new index before swapping them. + */ + oldClassRel = relation_open(oldIndexId, ShareUpdateExclusiveLock); + newClassRel = relation_open(newIndexId, ShareUpdateExclusiveLock); + + /* Now swap names and dependencies of those indexes */ + pg_class = table_open(RelationRelationId, RowExclusiveLock); + + oldClassTuple = SearchSysCacheCopy1(RELOID, + ObjectIdGetDatum(oldIndexId)); + if (!HeapTupleIsValid(oldClassTuple)) + elog(ERROR, "could not find tuple for relation %u", oldIndexId); + newClassTuple = SearchSysCacheCopy1(RELOID, + ObjectIdGetDatum(newIndexId)); + if (!HeapTupleIsValid(newClassTuple)) + elog(ERROR, "could not find tuple for relation %u", newIndexId); + + oldClassForm = (Form_pg_class) GETSTRUCT(oldClassTuple); + newClassForm = (Form_pg_class) GETSTRUCT(newClassTuple); + + /* Swap the names */ + namestrcpy(&newClassForm->relname, NameStr(oldClassForm->relname)); + namestrcpy(&oldClassForm->relname, oldName); + + CatalogTupleUpdate(pg_class, &oldClassTuple->t_self, oldClassTuple); + CatalogTupleUpdate(pg_class, &newClassTuple->t_self, newClassTuple); + + heap_freetuple(oldClassTuple); + heap_freetuple(newClassTuple); + + /* Now swap index info */ + pg_index = table_open(IndexRelationId, RowExclusiveLock); + + oldIndexTuple = SearchSysCacheCopy1(INDEXRELID, + ObjectIdGetDatum(oldIndexId)); + if (!HeapTupleIsValid(oldIndexTuple)) + elog(ERROR, "could not find tuple for relation %u", oldIndexId); + newIndexTuple = SearchSysCacheCopy1(INDEXRELID, + ObjectIdGetDatum(newIndexId)); + if (!HeapTupleIsValid(newIndexTuple)) + elog(ERROR, "could not find tuple for relation %u", newIndexId); + + oldIndexForm = (Form_pg_index) GETSTRUCT(oldIndexTuple); + newIndexForm = (Form_pg_index) GETSTRUCT(newIndexTuple); + + /* + * Copy constraint flags from the old index. This is safe because the old + * index guaranteed uniqueness. + */ + newIndexForm->indisprimary = oldIndexForm->indisprimary; + oldIndexForm->indisprimary = false; + newIndexForm->indisexclusion = oldIndexForm->indisexclusion; + oldIndexForm->indisexclusion = false; + newIndexForm->indimmediate = oldIndexForm->indimmediate; + oldIndexForm->indimmediate = true; + + /* Mark old index as valid and new as invalid as index_set_state_flags */ + newIndexForm->indisvalid = true; + oldIndexForm->indisvalid = false; + oldIndexForm->indisclustered = false; + + CatalogTupleUpdate(pg_index, &oldIndexTuple->t_self, oldIndexTuple); + CatalogTupleUpdate(pg_index, &newIndexTuple->t_self, newIndexTuple); + + heap_freetuple(oldIndexTuple); + heap_freetuple(newIndexTuple); + + /* + * Move constraints and triggers over to the new index + */ + + constraintOids = get_index_ref_constraints(oldIndexId); + + indexConstraintOid = get_index_constraint(oldIndexId); + + if (OidIsValid(indexConstraintOid)) + constraintOids = lappend_oid(constraintOids, indexConstraintOid); + + pg_constraint = table_open(ConstraintRelationId, RowExclusiveLock); + pg_trigger = table_open(TriggerRelationId, RowExclusiveLock); + + foreach(lc, constraintOids) + { + HeapTuple constraintTuple, + triggerTuple; + Form_pg_constraint conForm; + ScanKeyData key[1]; + SysScanDesc scan; + Oid constraintOid = lfirst_oid(lc); + + /* Move the constraint from the old to the new index */ + constraintTuple = SearchSysCacheCopy1(CONSTROID, + ObjectIdGetDatum(constraintOid)); + if (!HeapTupleIsValid(constraintTuple)) + elog(ERROR, "could not find tuple for constraint %u", constraintOid); + + conForm = ((Form_pg_constraint) GETSTRUCT(constraintTuple)); + + if (conForm->conindid == oldIndexId) + { + conForm->conindid = newIndexId; + + CatalogTupleUpdate(pg_constraint, &constraintTuple->t_self, constraintTuple); + } + + heap_freetuple(constraintTuple); + + /* Search for trigger records */ + ScanKeyInit(&key[0], + Anum_pg_trigger_tgconstraint, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(constraintOid)); + + scan = systable_beginscan(pg_trigger, TriggerConstraintIndexId, true, + NULL, 1, key); + + while (HeapTupleIsValid((triggerTuple = systable_getnext(scan)))) + { + Form_pg_trigger tgForm = (Form_pg_trigger) GETSTRUCT(triggerTuple); + + if (tgForm->tgconstrindid != oldIndexId) + continue; + + /* Make a modifiable copy */ + triggerTuple = heap_copytuple(triggerTuple); + tgForm = (Form_pg_trigger) GETSTRUCT(triggerTuple); + + tgForm->tgconstrindid = newIndexId; + + CatalogTupleUpdate(pg_trigger, &triggerTuple->t_self, triggerTuple); + + heap_freetuple(triggerTuple); + } + + systable_endscan(scan); + } + + /* + * Move comment if any + */ + { + Relation description; + ScanKeyData skey[3]; + SysScanDesc sd; + HeapTuple tuple; + Datum values[Natts_pg_description] = {0}; + bool nulls[Natts_pg_description] = {0}; + bool replaces[Natts_pg_description] = {0}; + + values[Anum_pg_description_objoid - 1] = ObjectIdGetDatum(newIndexId); + replaces[Anum_pg_description_objoid - 1] = true; + + ScanKeyInit(&skey[0], + Anum_pg_description_objoid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(oldIndexId)); + ScanKeyInit(&skey[1], + Anum_pg_description_classoid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationRelationId)); + ScanKeyInit(&skey[2], + Anum_pg_description_objsubid, + BTEqualStrategyNumber, F_INT4EQ, + Int32GetDatum(0)); + + description = table_open(DescriptionRelationId, RowExclusiveLock); + + sd = systable_beginscan(description, DescriptionObjIndexId, true, + NULL, 3, skey); + + while ((tuple = systable_getnext(sd)) != NULL) + { + tuple = heap_modify_tuple(tuple, RelationGetDescr(description), + values, nulls, replaces); + CatalogTupleUpdate(description, &tuple->t_self, tuple); + + break; /* Assume there can be only one match */ + } + + systable_endscan(sd); + table_close(description, NoLock); + } + + /* + * Move all dependencies on the old index to the new one + */ + + if (OidIsValid(indexConstraintOid)) + { + ObjectAddress myself, + referenced; + + /* Change to having the new index depend on the constraint */ + deleteDependencyRecordsForClass(RelationRelationId, oldIndexId, + ConstraintRelationId, DEPENDENCY_INTERNAL); + + myself.classId = RelationRelationId; + myself.objectId = newIndexId; + myself.objectSubId = 0; + + referenced.classId = ConstraintRelationId; + referenced.objectId = indexConstraintOid; + referenced.objectSubId = 0; + + recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL); + } + + changeDependenciesOn(RelationRelationId, oldIndexId, newIndexId); + + /* + * Copy over statistics from old to new index + */ + { + PgStat_StatTabEntry *tabentry; + + tabentry = pgstat_fetch_stat_tabentry(oldIndexId); + if (tabentry) + { + if (newClassRel->pgstat_info) + { + newClassRel->pgstat_info->t_counts.t_numscans = tabentry->numscans; + newClassRel->pgstat_info->t_counts.t_tuples_returned = tabentry->tuples_returned; + newClassRel->pgstat_info->t_counts.t_tuples_fetched = tabentry->tuples_fetched; + newClassRel->pgstat_info->t_counts.t_blocks_fetched = tabentry->blocks_fetched; + newClassRel->pgstat_info->t_counts.t_blocks_hit = tabentry->blocks_hit; + /* The data will be sent by the next pgstat_report_stat() call. */ + } + } + } + + /* Close relations */ + table_close(pg_class, RowExclusiveLock); + table_close(pg_index, RowExclusiveLock); + table_close(pg_constraint, RowExclusiveLock); + table_close(pg_trigger, RowExclusiveLock); + + /* The lock taken previously is not released until the end of transaction */ + relation_close(oldClassRel, NoLock); + relation_close(newClassRel, NoLock); +} + +/* + * index_concurrently_set_dead + * + * Perform the last invalidation stage of DROP INDEX CONCURRENTLY or REINDEX + * CONCURRENTLY before actually dropping the index. After calling this + * function, the index is seen by all the backends as dead. Low-level locks + * taken here are kept until the end of the transaction calling this function. + */ +void +index_concurrently_set_dead(Oid heapId, Oid indexId) +{ + Relation userHeapRelation; + Relation userIndexRelation; + + /* + * No more predicate locks will be acquired on this index, and we're + * about to stop doing inserts into the index which could show + * conflicts with existing predicate locks, so now is the time to move + * them to the heap relation. + */ + userHeapRelation = table_open(heapId, ShareUpdateExclusiveLock); + userIndexRelation = index_open(indexId, ShareUpdateExclusiveLock); + TransferPredicateLocksToHeapRelation(userIndexRelation); + + /* + * Now we are sure that nobody uses the index for queries; they just + * might have it open for updating it. So now we can unset indisready + * and indislive, then wait till nobody could be using it at all + * anymore. + */ + index_set_state_flags(indexId, INDEX_DROP_SET_DEAD); + + /* + * Invalidate the relcache for the table, so that after this commit + * all sessions will refresh the table's index list. Forgetting just + * the index's relcache entry is not enough. + */ + CacheInvalidateRelcache(userHeapRelation); + + /* + * Close the relations again, though still holding session lock. + */ + table_close(userHeapRelation, NoLock); + index_close(userIndexRelation, NoLock); +} + +/* * index_constraint_create * * Set up a constraint associated with an index. Return the new constraint's @@ -1438,9 +1895,14 @@ index_constraint_create(Relation heapRelation, * * NOTE: this routine should now only be called through performDeletion(), * else associated dependencies won't be cleaned up. + * + * If concurrent is true, do a DROP INDEX CONCURRENTLY. If concurrent is + * false but concurrent_lock_mode is true, then do a normal DROP INDEX but + * take a lock for CONCURRENTLY processing. That is used as part of REINDEX + * CONCURRENTLY. */ void -index_drop(Oid indexId, bool concurrent) +index_drop(Oid indexId, bool concurrent, bool concurrent_lock_mode) { Oid heapId; Relation userHeapRelation; @@ -1472,7 +1934,7 @@ index_drop(Oid indexId, bool concurrent) * using it.) */ heapId = IndexGetRelation(indexId, false); - lockmode = concurrent ? ShareUpdateExclusiveLock : AccessExclusiveLock; + lockmode = (concurrent || concurrent_lock_mode) ? ShareUpdateExclusiveLock : AccessExclusiveLock; userHeapRelation = table_open(heapId, lockmode); userIndexRelation = index_open(indexId, lockmode); @@ -1587,36 +2049,8 @@ index_drop(Oid indexId, bool concurrent) */ WaitForLockers(heaplocktag, AccessExclusiveLock); - /* - * No more predicate locks will be acquired on this index, and we're - * about to stop doing inserts into the index which could show - * conflicts with existing predicate locks, so now is the time to move - * them to the heap relation. - */ - userHeapRelation = table_open(heapId, ShareUpdateExclusiveLock); - userIndexRelation = index_open(indexId, ShareUpdateExclusiveLock); - TransferPredicateLocksToHeapRelation(userIndexRelation); - - /* - * Now we are sure that nobody uses the index for queries; they just - * might have it open for updating it. So now we can unset indisready - * and indislive, then wait till nobody could be using it at all - * anymore. - */ - index_set_state_flags(indexId, INDEX_DROP_SET_DEAD); - - /* - * Invalidate the relcache for the table, so that after this commit - * all sessions will refresh the table's index list. Forgetting just - * the index's relcache entry is not enough. - */ - CacheInvalidateRelcache(userHeapRelation); - - /* - * Close the relations again, though still holding session lock. - */ - table_close(userHeapRelation, NoLock); - index_close(userIndexRelation, NoLock); + /* Finish invalidation of index and mark it as dead */ + index_concurrently_set_dead(heapId, indexId); /* * Again, commit the transaction to make the pg_index update visible diff --git a/src/backend/catalog/pg_depend.c b/src/backend/catalog/pg_depend.c index 23b01f841e4..d63bf5e56d9 100644 --- a/src/backend/catalog/pg_depend.c +++ b/src/backend/catalog/pg_depend.c @@ -396,6 +396,94 @@ changeDependencyFor(Oid classId, Oid objectId, } /* + * Adjust all dependency records to point to a different object of the same type + * + * refClassId/oldRefObjectId specify the old referenced object. + * newRefObjectId is the new referenced object (must be of class refClassId). + * + * Returns the number of records updated. + */ +long +changeDependenciesOn(Oid refClassId, Oid oldRefObjectId, + Oid newRefObjectId) +{ + long count = 0; + Relation depRel; + ScanKeyData key[2]; + SysScanDesc scan; + HeapTuple tup; + ObjectAddress objAddr; + bool newIsPinned; + + depRel = table_open(DependRelationId, RowExclusiveLock); + + /* + * If oldRefObjectId is pinned, there won't be any dependency entries on + * it --- we can't cope in that case. (This isn't really worth expending + * code to fix, in current usage; it just means you can't rename stuff out + * of pg_catalog, which would likely be a bad move anyway.) + */ + objAddr.classId = refClassId; + objAddr.objectId = oldRefObjectId; + objAddr.objectSubId = 0; + + if (isObjectPinned(&objAddr, depRel)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot remove dependency on %s because it is a system object", + getObjectDescription(&objAddr)))); + + /* + * We can handle adding a dependency on something pinned, though, since + * that just means deleting the dependency entry. + */ + objAddr.objectId = newRefObjectId; + + newIsPinned = isObjectPinned(&objAddr, depRel); + + /* Now search for dependency records */ + ScanKeyInit(&key[0], + Anum_pg_depend_refclassid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(refClassId)); + ScanKeyInit(&key[1], + Anum_pg_depend_refobjid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(oldRefObjectId)); + + scan = systable_beginscan(depRel, DependReferenceIndexId, true, + NULL, 2, key); + + while (HeapTupleIsValid((tup = systable_getnext(scan)))) + { + Form_pg_depend depform = (Form_pg_depend) GETSTRUCT(tup); + + if (newIsPinned) + CatalogTupleDelete(depRel, &tup->t_self); + else + { + /* make a modifiable copy */ + tup = heap_copytuple(tup); + depform = (Form_pg_depend) GETSTRUCT(tup); + + depform->refobjid = newRefObjectId; + + CatalogTupleUpdate(depRel, &tup->t_self, tup); + + heap_freetuple(tup); + } + + count++; + } + + systable_endscan(scan); + + table_close(depRel, RowExclusiveLock); + + return count; +} + +/* * isObjectPinned() * * Test if an object is required for basic database functionality. @@ -754,3 +842,58 @@ get_index_constraint(Oid indexId) return constraintId; } + +/* + * get_index_ref_constraints + * Given the OID of an index, return the OID of all foreign key + * constraints which reference the index. + */ +List * +get_index_ref_constraints(Oid indexId) +{ + List *result = NIL; + Relation depRel; + ScanKeyData key[3]; + SysScanDesc scan; + HeapTuple tup; + + /* Search the dependency table for the index */ + depRel = table_open(DependRelationId, AccessShareLock); + + ScanKeyInit(&key[0], + Anum_pg_depend_refclassid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationRelationId)); + ScanKeyInit(&key[1], + Anum_pg_depend_refobjid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(indexId)); + ScanKeyInit(&key[2], + Anum_pg_depend_refobjsubid, + BTEqualStrategyNumber, F_INT4EQ, + Int32GetDatum(0)); + + scan = systable_beginscan(depRel, DependReferenceIndexId, true, + NULL, 3, key); + + while (HeapTupleIsValid(tup = systable_getnext(scan))) + { + Form_pg_depend deprec = (Form_pg_depend) GETSTRUCT(tup); + + /* + * We assume any normal dependency from a constraint must be what we + * are looking for. + */ + if (deprec->classid == ConstraintRelationId && + deprec->objsubid == 0 && + deprec->deptype == DEPENDENCY_NORMAL) + { + result = lappend_oid(result, deprec->objid); + } + } + + systable_endscan(scan); + table_close(depRel, AccessShareLock); + + return result; +} diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index d6eb48cb4e6..c68d8d58167 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -58,6 +58,7 @@ #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/partcache.h" +#include "utils/pg_rusage.h" #include "utils/regproc.h" #include "utils/snapmgr.h" #include "utils/syscache.h" @@ -83,6 +84,7 @@ static char *ChooseIndexNameAddition(List *colnames); static List *ChooseIndexColumnNames(List *indexElems); static void RangeVarCallbackForReindexIndex(const RangeVar *relation, Oid relId, Oid oldRelId, void *arg); +static bool ReindexRelationConcurrently(Oid relationOid, int options); static void ReindexPartitionedIndex(Relation parentIdx); /* @@ -297,6 +299,90 @@ CheckIndexCompatible(Oid oldId, return ret; } + +/* + * WaitForOlderSnapshots + * + * Wait for transactions that might have an older snapshot than the given xmin + * limit, because it might not contain tuples deleted just before it has + * been taken. Obtain a list of VXIDs of such transactions, and wait for them + * individually. This is used when building an index concurrently. + * + * We can exclude any running transactions that have xmin > the xmin given; + * their oldest snapshot must be newer than our xmin limit. + * We can also exclude any transactions that have xmin = zero, since they + * evidently have no live snapshot at all (and any one they might be in + * process of taking is certainly newer than ours). Transactions in other + * DBs can be ignored too, since they'll never even be able to see the + * index being worked on. + * + * We can also exclude autovacuum processes and processes running manual + * lazy VACUUMs, because they won't be fazed by missing index entries + * either. (Manual ANALYZEs, however, can't be excluded because they + * might be within transactions that are going to do arbitrary operations + * later.) + * + * Also, GetCurrentVirtualXIDs never reports our own vxid, so we need not + * check for that. + * + * If a process goes idle-in-transaction with xmin zero, we do not need to + * wait for it anymore, per the above argument. We do not have the + * infrastructure right now to stop waiting if that happens, but we can at + * least avoid the folly of waiting when it is idle at the time we would + * begin to wait. We do this by repeatedly rechecking the output of + * GetCurrentVirtualXIDs. If, during any iteration, a particular vxid + * doesn't show up in the output, we know we can forget about it. + */ +static void +WaitForOlderSnapshots(TransactionId limitXmin) +{ + int n_old_snapshots; + int i; + VirtualTransactionId *old_snapshots; + + old_snapshots = GetCurrentVirtualXIDs(limitXmin, true, false, + PROC_IS_AUTOVACUUM | PROC_IN_VACUUM, + &n_old_snapshots); + + for (i = 0; i < n_old_snapshots; i++) + { + if (!VirtualTransactionIdIsValid(old_snapshots[i])) + continue; /* found uninteresting in previous cycle */ + + if (i > 0) + { + /* see if anything's changed ... */ + VirtualTransactionId *newer_snapshots; + int n_newer_snapshots; + int j; + int k; + + newer_snapshots = GetCurrentVirtualXIDs(limitXmin, + true, false, + PROC_IS_AUTOVACUUM | PROC_IN_VACUUM, + &n_newer_snapshots); + for (j = i; j < n_old_snapshots; j++) + { + if (!VirtualTransactionIdIsValid(old_snapshots[j])) + continue; /* found uninteresting in previous cycle */ + for (k = 0; k < n_newer_snapshots; k++) + { + if (VirtualTransactionIdEquals(old_snapshots[j], + newer_snapshots[k])) + break; + } + if (k >= n_newer_snapshots) /* not there anymore */ + SetInvalidVirtualTransactionId(old_snapshots[j]); + } + pfree(newer_snapshots); + } + + if (VirtualTransactionIdIsValid(old_snapshots[i])) + VirtualXactLock(old_snapshots[i], true); + } +} + + /* * DefineIndex * Creates a new index. @@ -345,7 +431,6 @@ DefineIndex(Oid relationId, List *indexColNames; List *allIndexParams; Relation rel; - Relation indexRelation; HeapTuple tuple; Form_pg_am accessMethodForm; IndexAmRoutine *amRoutine; @@ -360,9 +445,7 @@ DefineIndex(Oid relationId, int numberOfAttributes; int numberOfKeyAttributes; TransactionId limitXmin; - VirtualTransactionId *old_snapshots; ObjectAddress address; - int n_old_snapshots; LockRelId heaprelid; LOCKTAG heaplocktag; LOCKMODE lockmode; @@ -1151,34 +1234,11 @@ DefineIndex(Oid relationId, * HOT-chain or the extension of the chain is HOT-safe for this index. */ - /* Open and lock the parent heap relation */ - rel = table_open(relationId, ShareUpdateExclusiveLock); - - /* And the target index relation */ - indexRelation = index_open(indexRelationId, RowExclusiveLock); - /* Set ActiveSnapshot since functions in the indexes may need it */ PushActiveSnapshot(GetTransactionSnapshot()); - /* We have to re-build the IndexInfo struct, since it was lost in commit */ - indexInfo = BuildIndexInfo(indexRelation); - Assert(!indexInfo->ii_ReadyForInserts); - indexInfo->ii_Concurrent = true; - indexInfo->ii_BrokenHotChain = false; - - /* Now build the index */ - index_build(rel, indexRelation, indexInfo, false, true); - - /* Close both the relations, but keep the locks */ - table_close(rel, NoLock); - index_close(indexRelation, NoLock); - - /* - * Update the pg_index row to mark the index as ready for inserts. Once we - * commit this transaction, any new transactions that open the table must - * insert new entries into the index for insertions and non-HOT updates. - */ - index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY); + /* Perform concurrent build of index */ + index_concurrently_build(relationId, indexRelationId); /* we can do away with our snapshot */ PopActiveSnapshot(); @@ -1250,74 +1310,9 @@ DefineIndex(Oid relationId, * The index is now valid in the sense that it contains all currently * interesting tuples. But since it might not contain tuples deleted just * before the reference snap was taken, we have to wait out any - * transactions that might have older snapshots. Obtain a list of VXIDs - * of such transactions, and wait for them individually. - * - * We can exclude any running transactions that have xmin > the xmin of - * our reference snapshot; their oldest snapshot must be newer than ours. - * We can also exclude any transactions that have xmin = zero, since they - * evidently have no live snapshot at all (and any one they might be in - * process of taking is certainly newer than ours). Transactions in other - * DBs can be ignored too, since they'll never even be able to see this - * index. - * - * We can also exclude autovacuum processes and processes running manual - * lazy VACUUMs, because they won't be fazed by missing index entries - * either. (Manual ANALYZEs, however, can't be excluded because they - * might be within transactions that are going to do arbitrary operations - * later.) - * - * Also, GetCurrentVirtualXIDs never reports our own vxid, so we need not - * check for that. - * - * If a process goes idle-in-transaction with xmin zero, we do not need to - * wait for it anymore, per the above argument. We do not have the - * infrastructure right now to stop waiting if that happens, but we can at - * least avoid the folly of waiting when it is idle at the time we would - * begin to wait. We do this by repeatedly rechecking the output of - * GetCurrentVirtualXIDs. If, during any iteration, a particular vxid - * doesn't show up in the output, we know we can forget about it. + * transactions that might have older snapshots. */ - old_snapshots = GetCurrentVirtualXIDs(limitXmin, true, false, - PROC_IS_AUTOVACUUM | PROC_IN_VACUUM, - &n_old_snapshots); - - for (i = 0; i < n_old_snapshots; i++) - { - if (!VirtualTransactionIdIsValid(old_snapshots[i])) - continue; /* found uninteresting in previous cycle */ - - if (i > 0) - { - /* see if anything's changed ... */ - VirtualTransactionId *newer_snapshots; - int n_newer_snapshots; - int j; - int k; - - newer_snapshots = GetCurrentVirtualXIDs(limitXmin, - true, false, - PROC_IS_AUTOVACUUM | PROC_IN_VACUUM, - &n_newer_snapshots); - for (j = i; j < n_old_snapshots; j++) - { - if (!VirtualTransactionIdIsValid(old_snapshots[j])) - continue; /* found uninteresting in previous cycle */ - for (k = 0; k < n_newer_snapshots; k++) - { - if (VirtualTransactionIdEquals(old_snapshots[j], - newer_snapshots[k])) - break; - } - if (k >= n_newer_snapshots) /* not there anymore */ - SetInvalidVirtualTransactionId(old_snapshots[j]); - } - pfree(newer_snapshots); - } - - if (VirtualTransactionIdIsValid(old_snapshots[i])) - VirtualXactLock(old_snapshots[i], true); - } + WaitForOlderSnapshots(limitXmin); /* * Index can now be marked valid -- update its pg_index entry @@ -2204,7 +2199,7 @@ ChooseIndexColumnNames(List *indexElems) * Recreate a specific index. */ void -ReindexIndex(RangeVar *indexRelation, int options) +ReindexIndex(RangeVar *indexRelation, int options, bool concurrent) { Oid indOid; Oid heapOid = InvalidOid; @@ -2216,7 +2211,8 @@ ReindexIndex(RangeVar *indexRelation, int options) * obtain lock on table first, to avoid deadlock hazard. The lock level * used here must match the index lock obtained in reindex_index(). */ - indOid = RangeVarGetRelidExtended(indexRelation, AccessExclusiveLock, + indOid = RangeVarGetRelidExtended(indexRelation, + concurrent ? ShareUpdateExclusiveLock : AccessExclusiveLock, 0, RangeVarCallbackForReindexIndex, (void *) &heapOid); @@ -2236,7 +2232,10 @@ ReindexIndex(RangeVar *indexRelation, int options) persistence = irel->rd_rel->relpersistence; index_close(irel, NoLock); - reindex_index(indOid, false, persistence, options); + if (concurrent) + ReindexRelationConcurrently(indOid, options); + else + reindex_index(indOid, false, persistence, options); } /* @@ -2304,18 +2303,26 @@ RangeVarCallbackForReindexIndex(const RangeVar *relation, * Recreate all indexes of a table (and of its toast table, if any) */ Oid -ReindexTable(RangeVar *relation, int options) +ReindexTable(RangeVar *relation, int options, bool concurrent) { Oid heapOid; + bool result; /* The lock level used here should match reindex_relation(). */ - heapOid = RangeVarGetRelidExtended(relation, ShareLock, 0, + heapOid = RangeVarGetRelidExtended(relation, + concurrent ? ShareUpdateExclusiveLock : ShareLock, + 0, RangeVarCallbackOwnsTable, NULL); - if (!reindex_relation(heapOid, - REINDEX_REL_PROCESS_TOAST | - REINDEX_REL_CHECK_CONSTRAINTS, - options)) + if (concurrent) + result = ReindexRelationConcurrently(heapOid, options); + else + result = reindex_relation(heapOid, + REINDEX_REL_PROCESS_TOAST | + REINDEX_REL_CHECK_CONSTRAINTS, + options); + + if (!result) ereport(NOTICE, (errmsg("table \"%s\" has no indexes", relation->relname))); @@ -2333,7 +2340,7 @@ ReindexTable(RangeVar *relation, int options) */ void ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind, - int options) + int options, bool concurrent) { Oid objectOid; Relation relationRelation; @@ -2345,12 +2352,18 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind, List *relids = NIL; ListCell *l; int num_keys; + bool concurrent_warning = false; AssertArg(objectName); Assert(objectKind == REINDEX_OBJECT_SCHEMA || objectKind == REINDEX_OBJECT_SYSTEM || objectKind == REINDEX_OBJECT_DATABASE); + if (objectKind == REINDEX_OBJECT_SYSTEM && concurrent) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("concurrent reindex of system catalogs is not supported"))); + /* * Get OID of object to reindex, being the database currently being used * by session for a database or for system catalogs, or the schema defined @@ -2453,6 +2466,25 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind, !pg_class_ownercheck(relid, GetUserId())) continue; + /* + * Skip system tables that index_create() would reject to index + * concurrently. XXX We need the additional check for + * FirstNormalObjectId to skip information_schema tables, because + * IsCatalogClass() here does not cover information_schema, but the + * check in index_create() will error on the TOAST tables of + * information_schema tables. + */ + if (concurrent && + (IsCatalogClass(relid, classtuple) || relid < FirstNormalObjectId)) + { + if (!concurrent_warning) + ereport(WARNING, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("concurrent reindex is not supported for catalog relations, skipping all"))); + concurrent_warning = true; + continue; + } + /* Save the list of relation OIDs in private context */ old = MemoryContextSwitchTo(private_context); @@ -2479,26 +2511,663 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind, foreach(l, relids) { Oid relid = lfirst_oid(l); + bool result; StartTransactionCommand(); /* functions in indexes may want a snapshot set */ PushActiveSnapshot(GetTransactionSnapshot()); - if (reindex_relation(relid, - REINDEX_REL_PROCESS_TOAST | - REINDEX_REL_CHECK_CONSTRAINTS, - options)) - if (options & REINDEXOPT_VERBOSE) + if (concurrent) + { + result = ReindexRelationConcurrently(relid, options); + /* ReindexRelationConcurrently() does the verbose output */ + } + else + { + result = reindex_relation(relid, + REINDEX_REL_PROCESS_TOAST | + REINDEX_REL_CHECK_CONSTRAINTS, + options); + + if (result && (options & REINDEXOPT_VERBOSE)) ereport(INFO, (errmsg("table \"%s.%s\" was reindexed", get_namespace_name(get_rel_namespace(relid)), get_rel_name(relid)))); + + PopActiveSnapshot(); + } + + CommitTransactionCommand(); + } + StartTransactionCommand(); + + MemoryContextDelete(private_context); +} + + +/* + * ReindexRelationConcurrently - process REINDEX CONCURRENTLY for given + * relation OID + * + * The relation can be either an index or a table. If it is a table, all its + * valid indexes will be rebuilt, including its associated toast table + * indexes. If it is an index, this index itself will be rebuilt. + * + * The locks taken on parent tables and involved indexes are kept until the + * transaction is committed, at which point a session lock is taken on each + * relation. Both of these protect against concurrent schema changes. + */ +static bool +ReindexRelationConcurrently(Oid relationOid, int options) +{ + List *heapRelationIds = NIL; + List *indexIds = NIL; + List *newIndexIds = NIL; + List *relationLocks = NIL; + List *lockTags = NIL; + ListCell *lc, + *lc2; + MemoryContext private_context; + MemoryContext oldcontext; + char relkind; + char *relationName = NULL; + char *relationNamespace = NULL; + PGRUsage ru0; + + /* + * Create a memory context that will survive forced transaction commits we + * do below. Since it is a child of PortalContext, it will go away + * eventually even if we suffer an error; there's no need for special + * abort cleanup logic. + */ + private_context = AllocSetContextCreate(PortalContext, + "ReindexConcurrent", + ALLOCSET_SMALL_SIZES); + + if (options & REINDEXOPT_VERBOSE) + { + /* Save data needed by REINDEX VERBOSE in private context */ + oldcontext = MemoryContextSwitchTo(private_context); + + relationName = get_rel_name(relationOid); + relationNamespace = get_namespace_name(get_rel_namespace(relationOid)); + + pg_rusage_init(&ru0); + + MemoryContextSwitchTo(oldcontext); + } + + relkind = get_rel_relkind(relationOid); + + /* + * Extract the list of indexes that are going to be rebuilt based on the + * list of relation Oids given by caller. + */ + switch (relkind) + { + case RELKIND_RELATION: + case RELKIND_MATVIEW: + case RELKIND_TOASTVALUE: + { + /* + * In the case of a relation, find all its indexes including + * toast indexes. + */ + Relation heapRelation; + + /* Save the list of relation OIDs in private context */ + oldcontext = MemoryContextSwitchTo(private_context); + + /* Track this relation for session locks */ + heapRelationIds = lappend_oid(heapRelationIds, relationOid); + + MemoryContextSwitchTo(oldcontext); + + /* Open relation to get its indexes */ + heapRelation = table_open(relationOid, ShareUpdateExclusiveLock); + + /* Add all the valid indexes of relation to list */ + foreach(lc, RelationGetIndexList(heapRelation)) + { + Oid cellOid = lfirst_oid(lc); + Relation indexRelation = index_open(cellOid, + ShareUpdateExclusiveLock); + + if (!indexRelation->rd_index->indisvalid) + ereport(WARNING, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot reindex concurrently invalid index \"%s.%s\", skipping", + get_namespace_name(get_rel_namespace(cellOid)), + get_rel_name(cellOid)))); + else if (indexRelation->rd_index->indisexclusion) + ereport(WARNING, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot reindex concurrently exclusion constraint index \"%s.%s\", skipping", + get_namespace_name(get_rel_namespace(cellOid)), + get_rel_name(cellOid)))); + else + { + /* Save the list of relation OIDs in private context */ + oldcontext = MemoryContextSwitchTo(private_context); + + indexIds = lappend_oid(indexIds, cellOid); + + MemoryContextSwitchTo(oldcontext); + } + + index_close(indexRelation, NoLock); + } + + /* Also add the toast indexes */ + if (OidIsValid(heapRelation->rd_rel->reltoastrelid)) + { + Oid toastOid = heapRelation->rd_rel->reltoastrelid; + Relation toastRelation = table_open(toastOid, + ShareUpdateExclusiveLock); + + /* Save the list of relation OIDs in private context */ + oldcontext = MemoryContextSwitchTo(private_context); + + /* Track this relation for session locks */ + heapRelationIds = lappend_oid(heapRelationIds, toastOid); + + MemoryContextSwitchTo(oldcontext); + + foreach(lc2, RelationGetIndexList(toastRelation)) + { + Oid cellOid = lfirst_oid(lc2); + Relation indexRelation = index_open(cellOid, + ShareUpdateExclusiveLock); + + if (!indexRelation->rd_index->indisvalid) + ereport(WARNING, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("cannot reindex concurrently invalid index \"%s.%s\", skipping", + get_namespace_name(get_rel_namespace(cellOid)), + get_rel_name(cellOid)))); + else + { + /* + * Save the list of relation OIDs in private + * context + */ + oldcontext = MemoryContextSwitchTo(private_context); + + indexIds = lappend_oid(indexIds, cellOid); + + MemoryContextSwitchTo(oldcontext); + } + + index_close(indexRelation, NoLock); + } + + table_close(toastRelation, NoLock); + } + + table_close(heapRelation, NoLock); + break; + } + case RELKIND_INDEX: + { + /* + * For an index simply add its Oid to list. Invalid indexes + * cannot be included in list. + */ + Relation indexRelation = index_open(relationOid, ShareUpdateExclusiveLock); + Oid heapId = IndexGetRelation(relationOid, false); + + /* A shared relation cannot be reindexed concurrently */ + if (IsSharedRelation(heapId)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("concurrent reindex is not supported for shared relations"))); + + /* A system catalog cannot be reindexed concurrently */ + if (IsSystemNamespace(get_rel_namespace(heapId))) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("concurrent reindex is not supported for catalog relations"))); + + /* Save the list of relation OIDs in private context */ + oldcontext = MemoryContextSwitchTo(private_context); + + /* Track the heap relation of this index for session locks */ + heapRelationIds = list_make1_oid(heapId); + + MemoryContextSwitchTo(oldcontext); + + if (!indexRelation->rd_index->indisvalid) + ereport(WARNING, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("cannot reindex concurrently invalid index \"%s.%s\", skipping", + get_namespace_name(get_rel_namespace(relationOid)), + get_rel_name(relationOid)))); + else + { + /* Save the list of relation OIDs in private context */ + oldcontext = MemoryContextSwitchTo(private_context); + + indexIds = lappend_oid(indexIds, relationOid); + + MemoryContextSwitchTo(oldcontext); + } + + index_close(indexRelation, NoLock); + break; + } + case RELKIND_PARTITIONED_TABLE: + /* see reindex_relation() */ + ereport(WARNING, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("REINDEX of partitioned tables is not yet implemented, skipping \"%s\"", + get_rel_name(relationOid)))); + return false; + default: + /* Return error if type of relation is not supported */ + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot reindex concurrently this type of relation"))); + break; + } + + /* Definitely no indexes, so leave */ + if (indexIds == NIL) + { + PopActiveSnapshot(); + return false; + } + + Assert(heapRelationIds != NIL); + + /*----- + * Now we have all the indexes we want to process in indexIds. + * + * The phases now are: + * + * 1. create new indexes in the catalog + * 2. build new indexes + * 3. let new indexes catch up with tuples inserted in the meantime + * 4. swap index names + * 5. mark old indexes as dead + * 6. drop old indexes + * + * We process each phase for all indexes before moving to the next phase, + * for efficiency. + */ + + /* + * Phase 1 of REINDEX CONCURRENTLY + * + * Create a new index with the same properties as the old one, but it is + * only registered in catalogs and will be built later. Then get session + * locks on all involved tables. See analogous code in DefineIndex() for + * more detailed comments. + */ + + foreach(lc, indexIds) + { + char *concurrentName; + Oid indexId = lfirst_oid(lc); + Oid newIndexId; + Relation indexRel; + Relation heapRel; + Relation newIndexRel; + LockRelId lockrelid; + + indexRel = index_open(indexId, ShareUpdateExclusiveLock); + heapRel = table_open(indexRel->rd_index->indrelid, + ShareUpdateExclusiveLock); + + /* Choose a temporary relation name for the new index */ + concurrentName = ChooseRelationName(get_rel_name(indexId), + NULL, + "ccnew", + get_rel_namespace(indexRel->rd_index->indrelid), + false); + + /* Create new index definition based on given index */ + newIndexId = index_concurrently_create_copy(heapRel, + indexId, + concurrentName); + + /* Now open the relation of the new index, a lock is also needed on it */ + newIndexRel = index_open(indexId, ShareUpdateExclusiveLock); + + /* + * Save the list of OIDs and locks in private context + */ + oldcontext = MemoryContextSwitchTo(private_context); + + newIndexIds = lappend_oid(newIndexIds, newIndexId); + + /* + * Save lockrelid to protect each relation from drop then close + * relations. The lockrelid on parent relation is not taken here to + * avoid multiple locks taken on the same relation, instead we rely on + * parentRelationIds built earlier. + */ + lockrelid = indexRel->rd_lockInfo.lockRelId; + relationLocks = lappend(relationLocks, &lockrelid); + lockrelid = newIndexRel->rd_lockInfo.lockRelId; + relationLocks = lappend(relationLocks, &lockrelid); + + MemoryContextSwitchTo(oldcontext); + + index_close(indexRel, NoLock); + index_close(newIndexRel, NoLock); + table_close(heapRel, NoLock); + } + + /* + * Save the heap lock for following visibility checks with other backends + * might conflict with this session. + */ + foreach(lc, heapRelationIds) + { + Relation heapRelation = table_open(lfirst_oid(lc), ShareUpdateExclusiveLock); + LockRelId lockrelid = heapRelation->rd_lockInfo.lockRelId; + LOCKTAG *heaplocktag; + + /* Save the list of locks in private context */ + oldcontext = MemoryContextSwitchTo(private_context); + + /* Add lockrelid of heap relation to the list of locked relations */ + relationLocks = lappend(relationLocks, &lockrelid); + + heaplocktag = (LOCKTAG *) palloc(sizeof(LOCKTAG)); + + /* Save the LOCKTAG for this parent relation for the wait phase */ + SET_LOCKTAG_RELATION(*heaplocktag, lockrelid.dbId, lockrelid.relId); + lockTags = lappend(lockTags, heaplocktag); + + MemoryContextSwitchTo(oldcontext); + + /* Close heap relation */ + table_close(heapRelation, NoLock); + } + + /* Get a session-level lock on each table. */ + foreach(lc, relationLocks) + { + LockRelId lockRel = *((LockRelId *) lfirst(lc)); + + LockRelationIdForSession(&lockRel, ShareUpdateExclusiveLock); + } + + PopActiveSnapshot(); + CommitTransactionCommand(); + StartTransactionCommand(); + + /* + * Phase 2 of REINDEX CONCURRENTLY + * + * Build the new indexes in a separate transaction for each index to avoid + * having open transactions for an unnecessary long time. But before + * doing that, wait until no running transactions could have the table of + * the index open with the old list of indexes. See "phase 2" in + * DefineIndex() for more details. + */ + + WaitForLockersMultiple(lockTags, ShareLock); + CommitTransactionCommand(); + + forboth(lc, indexIds, lc2, newIndexIds) + { + Relation indexRel; + Oid oldIndexId = lfirst_oid(lc); + Oid newIndexId = lfirst_oid(lc2); + Oid heapId; + + CHECK_FOR_INTERRUPTS(); + + /* Start new transaction for this index's concurrent build */ + StartTransactionCommand(); + + /* Set ActiveSnapshot since functions in the indexes may need it */ + PushActiveSnapshot(GetTransactionSnapshot()); + + /* + * Index relation has been closed by previous commit, so reopen it to + * get its information. + */ + indexRel = index_open(oldIndexId, ShareUpdateExclusiveLock); + heapId = indexRel->rd_index->indrelid; + index_close(indexRel, NoLock); + + /* Perform concurrent build of new index */ + index_concurrently_build(heapId, newIndexId); + + PopActiveSnapshot(); + CommitTransactionCommand(); + } + StartTransactionCommand(); + + /* + * Phase 3 of REINDEX CONCURRENTLY + * + * During this phase the old indexes catch up with any new tuples that + * were created during the previous phase. See "phase 3" in DefineIndex() + * for more details. + */ + + WaitForLockersMultiple(lockTags, ShareLock); + CommitTransactionCommand(); + + foreach(lc, newIndexIds) + { + Oid newIndexId = lfirst_oid(lc); + Oid heapId; + TransactionId limitXmin; + Snapshot snapshot; + + CHECK_FOR_INTERRUPTS(); + + StartTransactionCommand(); + + heapId = IndexGetRelation(newIndexId, false); + + /* + * Take the "reference snapshot" that will be used by validate_index() + * to filter candidate tuples. + */ + snapshot = RegisterSnapshot(GetTransactionSnapshot()); + PushActiveSnapshot(snapshot); + + validate_index(heapId, newIndexId, snapshot); + + /* + * We can now do away with our active snapshot, we still need to save + * the xmin limit to wait for older snapshots. + */ + limitXmin = snapshot->xmin; + PopActiveSnapshot(); + UnregisterSnapshot(snapshot); + + /* + * To ensure no deadlocks, we must commit and start yet another + * transaction, and do our wait before any snapshot has been taken in + * it. + */ + CommitTransactionCommand(); + StartTransactionCommand(); + + /* + * The index is now valid in the sense that it contains all currently + * interesting tuples. But since it might not contain tuples deleted just + * before the reference snap was taken, we have to wait out any + * transactions that might have older snapshots. + */ + WaitForOlderSnapshots(limitXmin); + CommitTransactionCommand(); } + + /* + * Phase 4 of REINDEX CONCURRENTLY + * + * Now that the new indexes have been validated, swap each new index with + * its corresponding old index. + * + * We mark the new indexes as valid and the old indexes as not valid at + * the same time to make sure we only get constraint violations from the + * indexes with the correct names. + */ + StartTransactionCommand(); + forboth(lc, indexIds, lc2, newIndexIds) + { + char *oldName; + Oid oldIndexId = lfirst_oid(lc); + Oid newIndexId = lfirst_oid(lc2); + Oid heapId; + + CHECK_FOR_INTERRUPTS(); + + heapId = IndexGetRelation(oldIndexId, false); + + /* Choose a relation name for old index */ + oldName = ChooseRelationName(get_rel_name(oldIndexId), + NULL, + "ccold", + get_rel_namespace(heapId), + false); + + /* + * Swap old index with the new one. This also marks the new one as + * valid and the old one as not valid. + */ + index_concurrently_swap(newIndexId, oldIndexId, oldName); + + /* + * Invalidate the relcache for the table, so that after this commit + * all sessions will refresh any cached plans that might reference the + * index. + */ + CacheInvalidateRelcacheByRelid(heapId); + + /* + * CCI here so that subsequent iterations see the oldName in the + * catalog and can choose a nonconflicting name for their oldName. + * Otherwise, this could lead to conflicts if a table has two indexes + * whose names are equal for the first NAMEDATALEN-minus-a-few + * characters. + */ + CommandCounterIncrement(); + } + + /* Commit this transaction and make index swaps visible */ + CommitTransactionCommand(); + StartTransactionCommand(); + + /* + * Phase 5 of REINDEX CONCURRENTLY + * + * Mark the old indexes as dead. First we must wait until no running + * transaction could be using the index for a query. See also + * index_drop() for more details. + */ + + WaitForLockersMultiple(lockTags, AccessExclusiveLock); + + foreach(lc, indexIds) + { + Oid oldIndexId = lfirst_oid(lc); + Oid heapId; + + CHECK_FOR_INTERRUPTS(); + heapId = IndexGetRelation(oldIndexId, false); + index_concurrently_set_dead(heapId, oldIndexId); + } + + /* Commit this transaction to make the updates visible. */ + CommitTransactionCommand(); + StartTransactionCommand(); + + /* + * Phase 6 of REINDEX CONCURRENTLY + * + * Drop the old indexes. + */ + + WaitForLockersMultiple(lockTags, AccessExclusiveLock); + + PushActiveSnapshot(GetTransactionSnapshot()); + + { + ObjectAddresses *objects = new_object_addresses(); + + foreach(lc, indexIds) + { + Oid oldIndexId = lfirst_oid(lc); + ObjectAddress *object = palloc(sizeof(ObjectAddress)); + + object->classId = RelationRelationId; + object->objectId = oldIndexId; + object->objectSubId = 0; + + add_exact_object_address(object, objects); + } + + /* + * Use PERFORM_DELETION_CONCURRENT_LOCK so that index_drop() uses the + * right lock level. + */ + performMultipleDeletions(objects, DROP_RESTRICT, + PERFORM_DELETION_CONCURRENT_LOCK | PERFORM_DELETION_INTERNAL); + } + + PopActiveSnapshot(); + CommitTransactionCommand(); + + /* + * Finally, release the session-level lock on the table. + */ + foreach(lc, relationLocks) + { + LockRelId lockRel = *((LockRelId *) lfirst(lc)); + + UnlockRelationIdForSession(&lockRel, ShareUpdateExclusiveLock); + } + + /* Start a new transaction to finish process properly */ + StartTransactionCommand(); + + /* Log what we did */ + if (options & REINDEXOPT_VERBOSE) + { + if (relkind == RELKIND_INDEX) + ereport(INFO, + (errmsg("index \"%s.%s\" was reindexed", + relationNamespace, relationName), + errdetail("%s.", + pg_rusage_show(&ru0)))); + else + { + foreach(lc, newIndexIds) + { + Oid indOid = lfirst_oid(lc); + + ereport(INFO, + (errmsg("index \"%s.%s\" was reindexed", + get_namespace_name(get_rel_namespace(indOid)), + get_rel_name(indOid)))); + /* Don't show rusage here, since it's not per index. */ + } + + ereport(INFO, + (errmsg("table \"%s.%s\" was reindexed", + relationNamespace, relationName), + errdetail("%s.", + pg_rusage_show(&ru0)))); + } + } + MemoryContextDelete(private_context); + + return true; } /* diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 06e7caa9cff..16492a23c74 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -1299,6 +1299,7 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid, bool is_partition; Form_pg_class classform; LOCKMODE heap_lockmode; + bool invalid_system_index = false; state = (struct DropRelationCallbackState *) arg; relkind = state->relkind; @@ -1361,7 +1362,36 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid, aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relOid)), rel->relname); - if (!allowSystemTableMods && IsSystemClass(relOid, classform)) + /* + * Check the case of a system index that might have been invalidated by a + * failed concurrent process and allow its drop. For the time being, this + * only concerns indexes of toast relations that became invalid during a + * REINDEX CONCURRENTLY process. + */ + if (IsSystemClass(relOid, classform) && relkind == RELKIND_INDEX) + { + HeapTuple locTuple; + Form_pg_index indexform; + bool indisvalid; + + locTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(relOid)); + if (!HeapTupleIsValid(locTuple)) + { + ReleaseSysCache(tuple); + return; + } + + indexform = (Form_pg_index) GETSTRUCT(locTuple); + indisvalid = indexform->indisvalid; + ReleaseSysCache(locTuple); + + /* Mark object as being an invalid index of system catalogs */ + if (!indisvalid) + invalid_system_index = true; + } + + /* In the case of an invalid index, it is fine to bypass this check */ + if (!invalid_system_index && !allowSystemTableMods && IsSystemClass(relOid, classform)) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("permission denied: \"%s\" is a system catalog", diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 04cc15606d2..84f9112addd 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -4367,6 +4367,7 @@ _copyReindexStmt(const ReindexStmt *from) COPY_NODE_FIELD(relation); COPY_STRING_FIELD(name); COPY_SCALAR_FIELD(options); + COPY_SCALAR_FIELD(concurrent); return newnode; } diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 91c007ad5b0..7eb9f1dd928 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -2103,6 +2103,7 @@ _equalReindexStmt(const ReindexStmt *a, const ReindexStmt *b) COMPARE_NODE_FIELD(relation); COMPARE_STRING_FIELD(name); COMPARE_SCALAR_FIELD(options); + COMPARE_SCALAR_FIELD(concurrent); return true; } diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 0a4822829a5..d711f9a7368 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -8300,42 +8300,46 @@ DropTransformStmt: DROP TRANSFORM opt_if_exists FOR Typename LANGUAGE name opt_d * * QUERY: * - * REINDEX [ (options) ] type <name> + * REINDEX [ (options) ] type [CONCURRENTLY] <name> *****************************************************************************/ ReindexStmt: - REINDEX reindex_target_type qualified_name + REINDEX reindex_target_type opt_concurrently qualified_name { ReindexStmt *n = makeNode(ReindexStmt); n->kind = $2; - n->relation = $3; + n->concurrent = $3; + n->relation = $4; n->name = NULL; n->options = 0; $$ = (Node *)n; } - | REINDEX reindex_target_multitable name + | REINDEX reindex_target_multitable opt_concurrently name { ReindexStmt *n = makeNode(ReindexStmt); n->kind = $2; - n->name = $3; + n->concurrent = $3; + n->name = $4; n->relation = NULL; n->options = 0; $$ = (Node *)n; } - | REINDEX '(' reindex_option_list ')' reindex_target_type qualified_name + | REINDEX '(' reindex_option_list ')' reindex_target_type opt_concurrently qualified_name { ReindexStmt *n = makeNode(ReindexStmt); n->kind = $5; - n->relation = $6; + n->concurrent = $6; + n->relation = $7; n->name = NULL; n->options = $3; $$ = (Node *)n; } - | REINDEX '(' reindex_option_list ')' reindex_target_multitable name + | REINDEX '(' reindex_option_list ')' reindex_target_multitable opt_concurrently name { ReindexStmt *n = makeNode(ReindexStmt); n->kind = $5; - n->name = $6; + n->concurrent = $6; + n->name = $7; n->relation = NULL; n->options = $3; $$ = (Node *)n; diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index 857b7a8b43f..edf24c438c9 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -774,16 +774,20 @@ standard_ProcessUtility(PlannedStmt *pstmt, { ReindexStmt *stmt = (ReindexStmt *) parsetree; + if (stmt->concurrent) + PreventInTransactionBlock(isTopLevel, + "REINDEX CONCURRENTLY"); + /* we choose to allow this during "read only" transactions */ PreventCommandDuringRecovery("REINDEX"); /* forbidden in parallel mode due to CommandIsReadOnly */ switch (stmt->kind) { case REINDEX_OBJECT_INDEX: - ReindexIndex(stmt->relation, stmt->options); + ReindexIndex(stmt->relation, stmt->options, stmt->concurrent); break; case REINDEX_OBJECT_TABLE: - ReindexTable(stmt->relation, stmt->options); + ReindexTable(stmt->relation, stmt->options, stmt->concurrent); break; case REINDEX_OBJECT_SCHEMA: case REINDEX_OBJECT_SYSTEM: @@ -799,7 +803,7 @@ standard_ProcessUtility(PlannedStmt *pstmt, (stmt->kind == REINDEX_OBJECT_SCHEMA) ? "REINDEX SCHEMA" : (stmt->kind == REINDEX_OBJECT_SYSTEM) ? "REINDEX SYSTEM" : "REINDEX DATABASE"); - ReindexMultipleTables(stmt->name, stmt->kind, stmt->options); + ReindexMultipleTables(stmt->name, stmt->kind, stmt->options, stmt->concurrent); break; default: elog(ERROR, "unrecognized object type: %d", |
