PostgreSQL Source Code git master
genam.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * genam.c
4 * general index access method routines
5 *
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/access/index/genam.c
12 *
13 * NOTES
14 * many of the old access method routines have been turned into
15 * macros and moved to genam.h -cim 4/30/91
16 *
17 *-------------------------------------------------------------------------
18 */
19
20#include "postgres.h"
21
22#include "access/genam.h"
23#include "access/heapam.h"
24#include "access/relscan.h"
25#include "access/tableam.h"
26#include "access/transam.h"
27#include "catalog/index.h"
28#include "lib/stringinfo.h"
29#include "miscadmin.h"
30#include "storage/bufmgr.h"
31#include "storage/procarray.h"
32#include "utils/acl.h"
34#include "utils/lsyscache.h"
35#include "utils/rel.h"
36#include "utils/rls.h"
37#include "utils/ruleutils.h"
38#include "utils/snapmgr.h"
39
40
41/* ----------------------------------------------------------------
42 * general access method routines
43 *
44 * All indexed access methods use an identical scan structure.
45 * We don't know how the various AMs do locking, however, so we don't
46 * do anything about that here.
47 *
48 * The intent is that an AM implementor will define a beginscan routine
49 * that calls RelationGetIndexScan, to fill in the scan, and then does
50 * whatever kind of locking he wants.
51 *
52 * At the end of a scan, the AM's endscan routine undoes the locking,
53 * but does *not* call IndexScanEnd --- the higher-level index_endscan
54 * routine does that. (We can't do it in the AM because index_endscan
55 * still needs to touch the IndexScanDesc after calling the AM.)
56 *
57 * Because of this, the AM does not have a choice whether to call
58 * RelationGetIndexScan or not; its beginscan routine must return an
59 * object made by RelationGetIndexScan. This is kinda ugly but not
60 * worth cleaning up now.
61 * ----------------------------------------------------------------
62 */
63
64/* ----------------
65 * RelationGetIndexScan -- Create and fill an IndexScanDesc.
66 *
67 * This routine creates an index scan structure and sets up initial
68 * contents for it.
69 *
70 * Parameters:
71 * indexRelation -- index relation for scan.
72 * nkeys -- count of scan keys (index qual conditions).
73 * norderbys -- count of index order-by operators.
74 *
75 * Returns:
76 * An initialized IndexScanDesc.
77 * ----------------
78 */
80RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
81{
82 IndexScanDesc scan;
83
84 scan = (IndexScanDesc) palloc(sizeof(IndexScanDescData));
85
86 scan->heapRelation = NULL; /* may be set later */
87 scan->xs_heapfetch = NULL;
88 scan->indexRelation = indexRelation;
89 scan->xs_snapshot = InvalidSnapshot; /* caller must initialize this */
90 scan->numberOfKeys = nkeys;
91 scan->numberOfOrderBys = norderbys;
92
93 /*
94 * We allocate key workspace here, but it won't get filled until amrescan.
95 */
96 if (nkeys > 0)
97 scan->keyData = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys);
98 else
99 scan->keyData = NULL;
100 if (norderbys > 0)
101 scan->orderByData = (ScanKey) palloc(sizeof(ScanKeyData) * norderbys);
102 else
103 scan->orderByData = NULL;
104
105 scan->xs_want_itup = false; /* may be set later */
106
107 /*
108 * During recovery we ignore killed tuples and don't bother to kill them
109 * either. We do this because the xmin on the primary node could easily be
110 * later than the xmin on the standby node, so that what the primary
111 * thinks is killed is supposed to be visible on standby. So for correct
112 * MVCC for queries during recovery we must ignore these hints and check
113 * all tuples. Do *not* set ignore_killed_tuples to true when running in a
114 * transaction that was started during recovery. xactStartedInRecovery
115 * should not be altered by index AMs.
116 */
117 scan->kill_prior_tuple = false;
120
121 scan->opaque = NULL;
122 scan->instrument = NULL;
123
124 scan->xs_itup = NULL;
125 scan->xs_itupdesc = NULL;
126 scan->xs_hitup = NULL;
127 scan->xs_hitupdesc = NULL;
128
129 return scan;
130}
131
132/* ----------------
133 * IndexScanEnd -- End an index scan.
134 *
135 * This routine just releases the storage acquired by
136 * RelationGetIndexScan(). Any AM-level resources are
137 * assumed to already have been released by the AM's
138 * endscan routine.
139 *
140 * Returns:
141 * None.
142 * ----------------
143 */
144void
146{
147 if (scan->keyData != NULL)
148 pfree(scan->keyData);
149 if (scan->orderByData != NULL)
150 pfree(scan->orderByData);
151
152 pfree(scan);
153}
154
155/*
156 * BuildIndexValueDescription
157 *
158 * Construct a string describing the contents of an index entry, in the
159 * form "(key_name, ...)=(key_value, ...)". This is currently used
160 * for building unique-constraint, exclusion-constraint error messages, and
161 * logical replication conflict error messages so only key columns of the index
162 * are checked and printed.
163 *
164 * Note that if the user does not have permissions to view all of the
165 * columns involved then a NULL is returned. Returning a partial key seems
166 * unlikely to be useful and we have no way to know which of the columns the
167 * user provided (unlike in ExecBuildSlotValueDescription).
168 *
169 * The passed-in values/nulls arrays are the "raw" input to the index AM,
170 * e.g. results of FormIndexDatum --- this is not necessarily what is stored
171 * in the index, but it's what the user perceives to be stored.
172 *
173 * Note: if you change anything here, check whether
174 * ExecBuildSlotPartitionKeyDescription() in execMain.c needs a similar
175 * change.
176 */
177char *
179 const Datum *values, const bool *isnull)
180{
182 Form_pg_index idxrec;
183 int indnkeyatts;
184 int i;
185 int keyno;
186 Oid indexrelid = RelationGetRelid(indexRelation);
187 Oid indrelid;
188 AclResult aclresult;
189
190 indnkeyatts = IndexRelationGetNumberOfKeyAttributes(indexRelation);
191
192 /*
193 * Check permissions- if the user does not have access to view all of the
194 * key columns then return NULL to avoid leaking data.
195 *
196 * First check if RLS is enabled for the relation. If so, return NULL to
197 * avoid leaking data.
198 *
199 * Next we need to check table-level SELECT access and then, if there is
200 * no access there, check column-level permissions.
201 */
202 idxrec = indexRelation->rd_index;
203 indrelid = idxrec->indrelid;
204 Assert(indexrelid == idxrec->indexrelid);
205
206 /* RLS check- if RLS is enabled then we don't return anything. */
207 if (check_enable_rls(indrelid, InvalidOid, true) == RLS_ENABLED)
208 return NULL;
209
210 /* Table-level SELECT is enough, if the user has it */
211 aclresult = pg_class_aclcheck(indrelid, GetUserId(), ACL_SELECT);
212 if (aclresult != ACLCHECK_OK)
213 {
214 /*
215 * No table-level access, so step through the columns in the index and
216 * make sure the user has SELECT rights on all of them.
217 */
218 for (keyno = 0; keyno < indnkeyatts; keyno++)
219 {
220 AttrNumber attnum = idxrec->indkey.values[keyno];
221
222 /*
223 * Note that if attnum == InvalidAttrNumber, then this is an index
224 * based on an expression and we return no detail rather than try
225 * to figure out what column(s) the expression includes and if the
226 * user has SELECT rights on them.
227 */
228 if (attnum == InvalidAttrNumber ||
231 {
232 /* No access, so clean up and return */
233 return NULL;
234 }
235 }
236 }
237
239 appendStringInfo(&buf, "(%s)=(",
240 pg_get_indexdef_columns(indexrelid, true));
241
242 for (i = 0; i < indnkeyatts; i++)
243 {
244 char *val;
245
246 if (isnull[i])
247 val = "null";
248 else
249 {
250 Oid foutoid;
251 bool typisvarlena;
252
253 /*
254 * The provided data is not necessarily of the type stored in the
255 * index; rather it is of the index opclass's input type. So look
256 * at rd_opcintype not the index tupdesc.
257 *
258 * Note: this is a bit shaky for opclasses that have pseudotype
259 * input types such as ANYARRAY or RECORD. Currently, the
260 * typoutput functions associated with the pseudotypes will work
261 * okay, but we might have to try harder in future.
262 */
263 getTypeOutputInfo(indexRelation->rd_opcintype[i],
264 &foutoid, &typisvarlena);
265 val = OidOutputFunctionCall(foutoid, values[i]);
266 }
267
268 if (i > 0)
271 }
272
274
275 return buf.data;
276}
277
278/*
279 * Get the snapshotConflictHorizon from the table entries pointed to by the
280 * index tuples being deleted using an AM-generic approach.
281 *
282 * This is a table_index_delete_tuples() shim used by index AMs that only need
283 * to consult the tableam to get a snapshotConflictHorizon value, and only
284 * expect to delete index tuples that are already known deletable (typically
285 * due to having LP_DEAD bits set). When a snapshotConflictHorizon value
286 * isn't needed in index AM's deletion WAL record, it is safe for it to skip
287 * calling here entirely.
288 *
289 * We assume that caller index AM uses the standard IndexTuple representation,
290 * with table TIDs stored in the t_tid field. We also expect (and assert)
291 * that the line pointers on page for 'itemnos' offsets are already marked
292 * LP_DEAD.
293 */
296 Relation hrel,
297 Buffer ibuf,
298 OffsetNumber *itemnos,
299 int nitems)
300{
301 TM_IndexDeleteOp delstate;
302 TransactionId snapshotConflictHorizon = InvalidTransactionId;
303 Page ipage = BufferGetPage(ibuf);
304 IndexTuple itup;
305
306 Assert(nitems > 0);
307
308 delstate.irel = irel;
309 delstate.iblknum = BufferGetBlockNumber(ibuf);
310 delstate.bottomup = false;
311 delstate.bottomupfreespace = 0;
312 delstate.ndeltids = 0;
313 delstate.deltids = palloc(nitems * sizeof(TM_IndexDelete));
314 delstate.status = palloc(nitems * sizeof(TM_IndexStatus));
315
316 /* identify what the index tuples about to be deleted point to */
317 for (int i = 0; i < nitems; i++)
318 {
319 OffsetNumber offnum = itemnos[i];
320 ItemId iitemid;
321
322 iitemid = PageGetItemId(ipage, offnum);
323 itup = (IndexTuple) PageGetItem(ipage, iitemid);
324
325 Assert(ItemIdIsDead(iitemid));
326
327 ItemPointerCopy(&itup->t_tid, &delstate.deltids[i].tid);
328 delstate.deltids[i].id = delstate.ndeltids;
329 delstate.status[i].idxoffnum = offnum;
330 delstate.status[i].knowndeletable = true; /* LP_DEAD-marked */
331 delstate.status[i].promising = false; /* unused */
332 delstate.status[i].freespace = 0; /* unused */
333
334 delstate.ndeltids++;
335 }
336
337 /* determine the actual xid horizon */
338 snapshotConflictHorizon = table_index_delete_tuples(hrel, &delstate);
339
340 /* assert tableam agrees that all items are deletable */
341 Assert(delstate.ndeltids == nitems);
342
343 pfree(delstate.deltids);
344 pfree(delstate.status);
345
346 return snapshotConflictHorizon;
347}
348
349
350/* ----------------------------------------------------------------
351 * heap-or-index-scan access to system catalogs
352 *
353 * These functions support system catalog accesses that normally use
354 * an index but need to be capable of being switched to heap scans
355 * if the system indexes are unavailable.
356 *
357 * The specified scan keys must be compatible with the named index.
358 * Generally this means that they must constrain either all columns
359 * of the index, or the first K columns of an N-column index.
360 *
361 * These routines could work with non-system tables, actually,
362 * but they're only useful when there is a known index to use with
363 * the given scan keys; so in practice they're only good for
364 * predetermined types of scans of system catalogs.
365 * ----------------------------------------------------------------
366 */
367
368/*
369 * systable_beginscan --- set up for heap-or-index scan
370 *
371 * rel: catalog to scan, already opened and suitably locked
372 * indexId: OID of index to conditionally use
373 * indexOK: if false, forces a heap scan (see notes below)
374 * snapshot: time qual to use (NULL for a recent catalog snapshot)
375 * nkeys, key: scan keys
376 *
377 * The attribute numbers in the scan key should be set for the heap case.
378 * If we choose to index, we convert them to 1..n to reference the index
379 * columns. Note this means there must be one scankey qualification per
380 * index column! This is checked by the Asserts in the normal, index-using
381 * case, but won't be checked if the heapscan path is taken.
382 *
383 * The routine checks the normal cases for whether an indexscan is safe,
384 * but caller can make additional checks and pass indexOK=false if needed.
385 * In standard case indexOK can simply be constant TRUE.
386 */
389 Oid indexId,
390 bool indexOK,
391 Snapshot snapshot,
392 int nkeys, ScanKey key)
393{
394 SysScanDesc sysscan;
395 Relation irel;
396
397 if (indexOK &&
399 !ReindexIsProcessingIndex(indexId))
400 irel = index_open(indexId, AccessShareLock);
401 else
402 irel = NULL;
403
404 sysscan = (SysScanDesc) palloc(sizeof(SysScanDescData));
405
406 sysscan->heap_rel = heapRelation;
407 sysscan->irel = irel;
408 sysscan->slot = table_slot_create(heapRelation, NULL);
409
410 if (snapshot == NULL)
411 {
412 Oid relid = RelationGetRelid(heapRelation);
413
414 snapshot = RegisterSnapshot(GetCatalogSnapshot(relid));
415 sysscan->snapshot = snapshot;
416 }
417 else
418 {
419 /* Caller is responsible for any snapshot. */
420 sysscan->snapshot = NULL;
421 }
422
423 if (irel)
424 {
425 int i;
426 ScanKey idxkey;
427
428 idxkey = palloc_array(ScanKeyData, nkeys);
429
430 /* Convert attribute numbers to be index column numbers. */
431 for (i = 0; i < nkeys; i++)
432 {
433 int j;
434
435 memcpy(&idxkey[i], &key[i], sizeof(ScanKeyData));
436
437 for (j = 0; j < IndexRelationGetNumberOfAttributes(irel); j++)
438 {
439 if (key[i].sk_attno == irel->rd_index->indkey.values[j])
440 {
441 idxkey[i].sk_attno = j + 1;
442 break;
443 }
444 }
446 elog(ERROR, "column is not in index");
447 }
448
449 sysscan->iscan = index_beginscan(heapRelation, irel,
450 snapshot, NULL, nkeys, 0);
451 index_rescan(sysscan->iscan, idxkey, nkeys, NULL, 0);
452 sysscan->scan = NULL;
453
454 pfree(idxkey);
455 }
456 else
457 {
458 /*
459 * We disallow synchronized scans when forced to use a heapscan on a
460 * catalog. In most cases the desired rows are near the front, so
461 * that the unpredictable start point of a syncscan is a serious
462 * disadvantage; and there are no compensating advantages, because
463 * it's unlikely that such scans will occur in parallel.
464 */
465 sysscan->scan = table_beginscan_strat(heapRelation, snapshot,
466 nkeys, key,
467 true, false);
468 sysscan->iscan = NULL;
469 }
470
471 /*
472 * If CheckXidAlive is set then set a flag to indicate that system table
473 * scan is in-progress. See detailed comments in xact.c where these
474 * variables are declared.
475 */
477 bsysscan = true;
478
479 return sysscan;
480}
481
482/*
483 * HandleConcurrentAbort - Handle concurrent abort of the CheckXidAlive.
484 *
485 * Error out, if CheckXidAlive is aborted. We can't directly use
486 * TransactionIdDidAbort as after crash such transaction might not have been
487 * marked as aborted. See detailed comments in xact.c where the variable
488 * is declared.
489 */
490static inline void
492{
497 (errcode(ERRCODE_TRANSACTION_ROLLBACK),
498 errmsg("transaction aborted during system catalog scan")));
499}
500
501/*
502 * systable_getnext --- get next tuple in a heap-or-index scan
503 *
504 * Returns NULL if no more tuples available.
505 *
506 * Note that returned tuple is a reference to data in a disk buffer;
507 * it must not be modified, and should be presumed inaccessible after
508 * next getnext() or endscan() call.
509 *
510 * XXX: It'd probably make sense to offer a slot based interface, at least
511 * optionally.
512 */
515{
516 HeapTuple htup = NULL;
517
518 if (sysscan->irel)
519 {
520 if (index_getnext_slot(sysscan->iscan, ForwardScanDirection, sysscan->slot))
521 {
522 bool shouldFree;
523
524 htup = ExecFetchSlotHeapTuple(sysscan->slot, false, &shouldFree);
525 Assert(!shouldFree);
526
527 /*
528 * We currently don't need to support lossy index operators for
529 * any system catalog scan. It could be done here, using the scan
530 * keys to drive the operator calls, if we arranged to save the
531 * heap attnums during systable_beginscan(); this is practical
532 * because we still wouldn't need to support indexes on
533 * expressions.
534 */
535 if (sysscan->iscan->xs_recheck)
536 elog(ERROR, "system catalog scans with lossy index conditions are not implemented");
537 }
538 }
539 else
540 {
541 if (table_scan_getnextslot(sysscan->scan, ForwardScanDirection, sysscan->slot))
542 {
543 bool shouldFree;
544
545 htup = ExecFetchSlotHeapTuple(sysscan->slot, false, &shouldFree);
546 Assert(!shouldFree);
547 }
548 }
549
550 /*
551 * Handle the concurrent abort while fetching the catalog tuple during
552 * logical streaming of a transaction.
553 */
555
556 return htup;
557}
558
559/*
560 * systable_recheck_tuple --- recheck visibility of most-recently-fetched tuple
561 *
562 * In particular, determine if this tuple would be visible to a catalog scan
563 * that started now. We don't handle the case of a non-MVCC scan snapshot,
564 * because no caller needs that yet.
565 *
566 * This is useful to test whether an object was deleted while we waited to
567 * acquire lock on it.
568 *
569 * Note: we don't actually *need* the tuple to be passed in, but it's a
570 * good crosscheck that the caller is interested in the right tuple.
571 */
572bool
574{
575 Snapshot freshsnap;
576 bool result;
577
578 Assert(tup == ExecFetchSlotHeapTuple(sysscan->slot, false, NULL));
579
580 freshsnap = GetCatalogSnapshot(RelationGetRelid(sysscan->heap_rel));
581 freshsnap = RegisterSnapshot(freshsnap);
582
584 sysscan->slot,
585 freshsnap);
586 UnregisterSnapshot(freshsnap);
587
588 /*
589 * Handle the concurrent abort while fetching the catalog tuple during
590 * logical streaming of a transaction.
591 */
593
594 return result;
595}
596
597/*
598 * systable_endscan --- close scan, release resources
599 *
600 * Note that it's still up to the caller to close the heap relation.
601 */
602void
604{
605 if (sysscan->slot)
606 {
608 sysscan->slot = NULL;
609 }
610
611 if (sysscan->irel)
612 {
613 index_endscan(sysscan->iscan);
615 }
616 else
617 table_endscan(sysscan->scan);
618
619 if (sysscan->snapshot)
621
622 /*
623 * Reset the bsysscan flag at the end of the systable scan. See detailed
624 * comments in xact.c where these variables are declared.
625 */
627 bsysscan = false;
628
629 pfree(sysscan);
630}
631
632
633/*
634 * systable_beginscan_ordered --- set up for ordered catalog scan
635 *
636 * These routines have essentially the same API as systable_beginscan etc,
637 * except that they guarantee to return multiple matching tuples in
638 * index order. Also, for largely historical reasons, the index to use
639 * is opened and locked by the caller, not here.
640 *
641 * Currently we do not support non-index-based scans here. (In principle
642 * we could do a heapscan and sort, but the uses are in places that
643 * probably don't need to still work with corrupted catalog indexes.)
644 * For the moment, therefore, these functions are merely the thinest of
645 * wrappers around index_beginscan/index_getnext_slot. The main reason for
646 * their existence is to centralize possible future support of lossy operators
647 * in catalog scans.
648 */
651 Relation indexRelation,
652 Snapshot snapshot,
653 int nkeys, ScanKey key)
654{
655 SysScanDesc sysscan;
656 int i;
657 ScanKey idxkey;
658
659 /* REINDEX can probably be a hard error here ... */
660 if (ReindexIsProcessingIndex(RelationGetRelid(indexRelation)))
662 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
663 errmsg("cannot access index \"%s\" while it is being reindexed",
664 RelationGetRelationName(indexRelation))));
665 /* ... but we only throw a warning about violating IgnoreSystemIndexes */
667 elog(WARNING, "using index \"%s\" despite IgnoreSystemIndexes",
668 RelationGetRelationName(indexRelation));
669
670 sysscan = (SysScanDesc) palloc(sizeof(SysScanDescData));
671
672 sysscan->heap_rel = heapRelation;
673 sysscan->irel = indexRelation;
674 sysscan->slot = table_slot_create(heapRelation, NULL);
675
676 if (snapshot == NULL)
677 {
678 Oid relid = RelationGetRelid(heapRelation);
679
680 snapshot = RegisterSnapshot(GetCatalogSnapshot(relid));
681 sysscan->snapshot = snapshot;
682 }
683 else
684 {
685 /* Caller is responsible for any snapshot. */
686 sysscan->snapshot = NULL;
687 }
688
689 idxkey = palloc_array(ScanKeyData, nkeys);
690
691 /* Convert attribute numbers to be index column numbers. */
692 for (i = 0; i < nkeys; i++)
693 {
694 int j;
695
696 memcpy(&idxkey[i], &key[i], sizeof(ScanKeyData));
697
698 for (j = 0; j < IndexRelationGetNumberOfAttributes(indexRelation); j++)
699 {
700 if (key[i].sk_attno == indexRelation->rd_index->indkey.values[j])
701 {
702 idxkey[i].sk_attno = j + 1;
703 break;
704 }
705 }
706 if (j == IndexRelationGetNumberOfAttributes(indexRelation))
707 elog(ERROR, "column is not in index");
708 }
709
710 sysscan->iscan = index_beginscan(heapRelation, indexRelation,
711 snapshot, NULL, nkeys, 0);
712 index_rescan(sysscan->iscan, idxkey, nkeys, NULL, 0);
713 sysscan->scan = NULL;
714
715 pfree(idxkey);
716
717 /*
718 * If CheckXidAlive is set then set a flag to indicate that system table
719 * scan is in-progress. See detailed comments in xact.c where these
720 * variables are declared.
721 */
723 bsysscan = true;
724
725 return sysscan;
726}
727
728/*
729 * systable_getnext_ordered --- get next tuple in an ordered catalog scan
730 */
733{
734 HeapTuple htup = NULL;
735
736 Assert(sysscan->irel);
737 if (index_getnext_slot(sysscan->iscan, direction, sysscan->slot))
738 htup = ExecFetchSlotHeapTuple(sysscan->slot, false, NULL);
739
740 /* See notes in systable_getnext */
741 if (htup && sysscan->iscan->xs_recheck)
742 elog(ERROR, "system catalog scans with lossy index conditions are not implemented");
743
744 /*
745 * Handle the concurrent abort while fetching the catalog tuple during
746 * logical streaming of a transaction.
747 */
749
750 return htup;
751}
752
753/*
754 * systable_endscan_ordered --- close scan, release resources
755 */
756void
758{
759 if (sysscan->slot)
760 {
762 sysscan->slot = NULL;
763 }
764
765 Assert(sysscan->irel);
766 index_endscan(sysscan->iscan);
767 if (sysscan->snapshot)
769
770 /*
771 * Reset the bsysscan flag at the end of the systable scan. See detailed
772 * comments in xact.c where these variables are declared.
773 */
775 bsysscan = false;
776
777 pfree(sysscan);
778}
779
780/*
781 * systable_inplace_update_begin --- update a row "in place" (overwrite it)
782 *
783 * Overwriting violates both MVCC and transactional safety, so the uses of
784 * this function in Postgres are extremely limited. Nonetheless we find some
785 * places to use it. See README.tuplock section "Locking to write
786 * inplace-updated tables" and later sections for expectations of readers and
787 * writers of a table that gets inplace updates. Standard flow:
788 *
789 * ... [any slow preparation not requiring oldtup] ...
790 * systable_inplace_update_begin([...], &tup, &inplace_state);
791 * if (!HeapTupleIsValid(tup))
792 * elog(ERROR, [...]);
793 * ... [buffer is exclusive-locked; mutate "tup"] ...
794 * if (dirty)
795 * systable_inplace_update_finish(inplace_state, tup);
796 * else
797 * systable_inplace_update_cancel(inplace_state);
798 *
799 * The first several params duplicate the systable_beginscan() param list.
800 * "oldtupcopy" is an output parameter, assigned NULL if the key ceases to
801 * find a live tuple. (In PROC_IN_VACUUM, that is a low-probability transient
802 * condition.) If "oldtupcopy" gets non-NULL, you must pass output parameter
803 * "state" to systable_inplace_update_finish() or
804 * systable_inplace_update_cancel().
805 */
806void
808 Oid indexId,
809 bool indexOK,
810 Snapshot snapshot,
811 int nkeys, const ScanKeyData *key,
812 HeapTuple *oldtupcopy,
813 void **state)
814{
815 int retries = 0;
816 SysScanDesc scan;
817 HeapTuple oldtup;
819
820 /*
821 * For now, we don't allow parallel updates. Unlike a regular update,
822 * this should never create a combo CID, so it might be possible to relax
823 * this restriction, but not without more thought and testing. It's not
824 * clear that it would be useful, anyway.
825 */
826 if (IsInParallelMode())
828 (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
829 errmsg("cannot update tuples during a parallel operation")));
830
831 /*
832 * Accept a snapshot argument, for symmetry, but this function advances
833 * its snapshot as needed to reach the tail of the updated tuple chain.
834 */
835 Assert(snapshot == NULL);
836
837 Assert(IsInplaceUpdateRelation(relation) || !IsSystemRelation(relation));
838
839 /* Loop for an exclusive-locked buffer of a non-updated tuple. */
840 do
841 {
842 TupleTableSlot *slot;
843
845
846 /*
847 * Processes issuing heap_update (e.g. GRANT) at maximum speed could
848 * drive us to this error. A hostile table owner has stronger ways to
849 * damage their own table, so that's minor.
850 */
851 if (retries++ > 10000)
852 elog(ERROR, "giving up after too many tries to overwrite row");
853
854 INJECTION_POINT("inplace-before-pin");
855 scan = systable_beginscan(relation, indexId, indexOK, snapshot,
856 nkeys, unconstify(ScanKeyData *, key));
857 oldtup = systable_getnext(scan);
858 if (!HeapTupleIsValid(oldtup))
859 {
860 systable_endscan(scan);
861 *oldtupcopy = NULL;
862 return;
863 }
864
865 slot = scan->slot;
867 bslot = (BufferHeapTupleTableSlot *) slot;
868 } while (!heap_inplace_lock(scan->heap_rel,
869 bslot->base.tuple, bslot->buffer,
870 (void (*) (void *)) systable_endscan, scan));
871
872 *oldtupcopy = heap_copytuple(oldtup);
873 *state = scan;
874}
875
876/*
877 * systable_inplace_update_finish --- second phase of inplace update
878 *
879 * The tuple cannot change size, and therefore its header fields and null
880 * bitmap (if any) don't change either.
881 */
882void
884{
886 Relation relation = scan->heap_rel;
887 TupleTableSlot *slot = scan->slot;
889 HeapTuple oldtup = bslot->base.tuple;
890 Buffer buffer = bslot->buffer;
891
892 heap_inplace_update_and_unlock(relation, oldtup, tuple, buffer);
893 systable_endscan(scan);
894}
895
896/*
897 * systable_inplace_update_cancel --- abandon inplace update
898 *
899 * This is an alternative to making a no-op update.
900 */
901void
903{
905 Relation relation = scan->heap_rel;
906 TupleTableSlot *slot = scan->slot;
908 HeapTuple oldtup = bslot->base.tuple;
909 Buffer buffer = bslot->buffer;
910
911 heap_inplace_unlock(relation, oldtup, buffer);
912 systable_endscan(scan);
913}
AclResult
Definition: acl.h:182
@ ACLCHECK_OK
Definition: acl.h:183
AclResult pg_attribute_aclcheck(Oid table_oid, AttrNumber attnum, Oid roleid, AclMode mode)
Definition: aclchk.c:3853
AclResult pg_class_aclcheck(Oid table_oid, Oid roleid, AclMode mode)
Definition: aclchk.c:4024
int16 AttrNumber
Definition: attnum.h:21
#define InvalidAttrNumber
Definition: attnum.h:23
static Datum values[MAXATTR]
Definition: bootstrap.c:151
int Buffer
Definition: buf.h:23
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:4231
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:417
static Item PageGetItem(const PageData *page, const ItemIdData *itemId)
Definition: bufpage.h:354
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:244
PageData * Page
Definition: bufpage.h:82
#define unconstify(underlying_type, expr)
Definition: c.h:1216
uint32 TransactionId
Definition: c.h:623
bool IsSystemRelation(Relation relation)
Definition: catalog.c:74
bool IsInplaceUpdateRelation(Relation relation)
Definition: catalog.c:183
int errcode(int sqlerrcode)
Definition: elog.c:854
int errmsg(const char *fmt,...)
Definition: elog.c:1071
#define WARNING
Definition: elog.h:36
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
void ExecDropSingleTupleTableSlot(TupleTableSlot *slot)
Definition: execTuples.c:1443
HeapTuple ExecFetchSlotHeapTuple(TupleTableSlot *slot, bool materialize, bool *shouldFree)
Definition: execTuples.c:1833
#define palloc_array(type, count)
Definition: fe_memutils.h:76
char * OidOutputFunctionCall(Oid functionId, Datum val)
Definition: fmgr.c:1763
char * BuildIndexValueDescription(Relation indexRelation, const Datum *values, const bool *isnull)
Definition: genam.c:178
void systable_endscan(SysScanDesc sysscan)
Definition: genam.c:603
void systable_inplace_update_cancel(void *state)
Definition: genam.c:902
bool systable_recheck_tuple(SysScanDesc sysscan, HeapTuple tup)
Definition: genam.c:573
void IndexScanEnd(IndexScanDesc scan)
Definition: genam.c:145
void systable_inplace_update_begin(Relation relation, Oid indexId, bool indexOK, Snapshot snapshot, int nkeys, const ScanKeyData *key, HeapTuple *oldtupcopy, void **state)
Definition: genam.c:807
TransactionId index_compute_xid_horizon_for_tuples(Relation irel, Relation hrel, Buffer ibuf, OffsetNumber *itemnos, int nitems)
Definition: genam.c:295
static void HandleConcurrentAbort()
Definition: genam.c:491
SysScanDesc systable_beginscan_ordered(Relation heapRelation, Relation indexRelation, Snapshot snapshot, int nkeys, ScanKey key)
Definition: genam.c:650
void systable_inplace_update_finish(void *state, HeapTuple tuple)
Definition: genam.c:883
HeapTuple systable_getnext(SysScanDesc sysscan)
Definition: genam.c:514
void systable_endscan_ordered(SysScanDesc sysscan)
Definition: genam.c:757
HeapTuple systable_getnext_ordered(SysScanDesc sysscan, ScanDirection direction)
Definition: genam.c:732
SysScanDesc systable_beginscan(Relation heapRelation, Oid indexId, bool indexOK, Snapshot snapshot, int nkeys, ScanKey key)
Definition: genam.c:388
IndexScanDesc RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
Definition: genam.c:80
struct SysScanDescData * SysScanDesc
Definition: genam.h:114
struct IndexScanDescData * IndexScanDesc
Definition: genam.h:113
Assert(PointerIsAligned(start, uint64))
bool heap_inplace_lock(Relation relation, HeapTuple oldtup_ptr, Buffer buffer, void(*release_callback)(void *), void *arg)
Definition: heapam.c:6318
void heap_inplace_unlock(Relation relation, HeapTuple oldtup, Buffer buffer)
Definition: heapam.c:6607
void heap_inplace_update_and_unlock(Relation relation, HeapTuple oldtup, HeapTuple tuple, Buffer buffer)
Definition: heapam.c:6454
HeapTuple heap_copytuple(HeapTuple tuple)
Definition: heaptuple.c:778
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define nitems(x)
Definition: indent.h:31
bool ReindexIsProcessingIndex(Oid indexOid)
Definition: index.c:4140
bool index_getnext_slot(IndexScanDesc scan, ScanDirection direction, TupleTableSlot *slot)
Definition: indexam.c:720
IndexScanDesc index_beginscan(Relation heapRelation, Relation indexRelation, Snapshot snapshot, IndexScanInstrumentation *instrument, int nkeys, int norderbys)
Definition: indexam.c:256
void index_close(Relation relation, LOCKMODE lockmode)
Definition: indexam.c:177
void index_endscan(IndexScanDesc scan)
Definition: indexam.c:382
Relation index_open(Oid relationId, LOCKMODE lockmode)
Definition: indexam.c:133
void index_rescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int norderbys)
Definition: indexam.c:356
long val
Definition: informix.c:689
#define INJECTION_POINT(name)
int j
Definition: isn.c:78
int i
Definition: isn.c:77
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
static void ItemPointerCopy(const ItemPointerData *fromPointer, ItemPointerData *toPointer)
Definition: itemptr.h:172
IndexTupleData * IndexTuple
Definition: itup.h:53
#define AccessShareLock
Definition: lockdefs.h:36
void getTypeOutputInfo(Oid type, Oid *typOutput, bool *typIsVarlena)
Definition: lsyscache.c:3047
void pfree(void *pointer)
Definition: mcxt.c:2150
void * palloc(Size size)
Definition: mcxt.c:1943
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:123
Oid GetUserId(void)
Definition: miscinit.c:520
bool IgnoreSystemIndexes
Definition: miscinit.c:81
uint16 OffsetNumber
Definition: off.h:24
#define ACL_SELECT
Definition: parsenodes.h:77
int16 attnum
Definition: pg_attribute.h:74
FormData_pg_index * Form_pg_index
Definition: pg_index.h:70
static char * buf
Definition: pg_test_fsync.c:72
uintptr_t Datum
Definition: postgres.h:69
#define InvalidOid
Definition: postgres_ext.h:35
unsigned int Oid
Definition: postgres_ext.h:30
bool TransactionIdIsInProgress(TransactionId xid)
Definition: procarray.c:1402
#define RelationGetRelid(relation)
Definition: rel.h:516
#define RelationGetRelationName(relation)
Definition: rel.h:550
#define IndexRelationGetNumberOfAttributes(relation)
Definition: rel.h:528
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:535
int check_enable_rls(Oid relid, Oid checkAsUser, bool noError)
Definition: rls.c:52
@ RLS_ENABLED
Definition: rls.h:45
char * pg_get_indexdef_columns(Oid indexrelid, bool pretty)
Definition: ruleutils.c:1235
ScanDirection
Definition: sdir.h:25
@ ForwardScanDirection
Definition: sdir.h:28
ScanKeyData * ScanKey
Definition: skey.h:75
Snapshot GetCatalogSnapshot(Oid relid)
Definition: snapmgr.c:373
void UnregisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:853
Snapshot RegisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:811
#define InvalidSnapshot
Definition: snapshot.h:119
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:145
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:230
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:242
void initStringInfo(StringInfo str)
Definition: stringinfo.c:97
struct ScanKeyData * keyData
Definition: relscan.h:141
struct ScanKeyData * orderByData
Definition: relscan.h:142
HeapTuple xs_hitup
Definition: relscan.h:169
bool ignore_killed_tuples
Definition: relscan.h:148
IndexFetchTableData * xs_heapfetch
Definition: relscan.h:175
int numberOfOrderBys
Definition: relscan.h:140
bool xactStartedInRecovery
Definition: relscan.h:149
struct IndexScanInstrumentation * instrument
Definition: relscan.h:159
IndexTuple xs_itup
Definition: relscan.h:167
bool kill_prior_tuple
Definition: relscan.h:147
struct TupleDescData * xs_hitupdesc
Definition: relscan.h:170
struct TupleDescData * xs_itupdesc
Definition: relscan.h:168
Relation indexRelation
Definition: relscan.h:137
struct SnapshotData * xs_snapshot
Definition: relscan.h:138
Relation heapRelation
Definition: relscan.h:136
ItemPointerData t_tid
Definition: itup.h:37
Oid * rd_opcintype
Definition: rel.h:208
Form_pg_index rd_index
Definition: rel.h:192
AttrNumber sk_attno
Definition: skey.h:67
Relation irel
Definition: relscan.h:210
Relation heap_rel
Definition: relscan.h:209
struct SnapshotData * snapshot
Definition: relscan.h:213
struct IndexScanDescData * iscan
Definition: relscan.h:212
struct TupleTableSlot * slot
Definition: relscan.h:214
struct TableScanDescData * scan
Definition: relscan.h:211
TM_IndexStatus * status
Definition: tableam.h:248
int bottomupfreespace
Definition: tableam.h:243
Relation irel
Definition: tableam.h:240
TM_IndexDelete * deltids
Definition: tableam.h:247
BlockNumber iblknum
Definition: tableam.h:241
ItemPointerData tid
Definition: tableam.h:206
bool knowndeletable
Definition: tableam.h:213
bool promising
Definition: tableam.h:216
int16 freespace
Definition: tableam.h:217
OffsetNumber idxoffnum
Definition: tableam.h:212
Definition: regguts.h:323
TupleTableSlot * table_slot_create(Relation relation, List **reglist)
Definition: tableam.c:92
static void table_endscan(TableScanDesc scan)
Definition: tableam.h:979
static TableScanDesc table_beginscan_strat(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, bool allow_strat, bool allow_sync)
Definition: tableam.h:894
static bool table_scan_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: tableam.h:1015
static bool table_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot, Snapshot snapshot)
Definition: tableam.h:1295
static TransactionId table_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate)
Definition: tableam.h:1316
bool TransactionIdDidCommit(TransactionId transactionId)
Definition: transam.c:126
#define InvalidTransactionId
Definition: transam.h:31
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define TTS_IS_BUFFERTUPLE(slot)
Definition: tuptable.h:241
bool TransactionStartedDuringRecovery(void)
Definition: xact.c:1042
bool bsysscan
Definition: xact.c:100
TransactionId CheckXidAlive
Definition: xact.c:99
bool IsInParallelMode(void)
Definition: xact.c:1089