PostgreSQL Source Code git master
multixact.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * multixact.c
4 * PostgreSQL multi-transaction-log manager
5 *
6 * The pg_multixact manager is a pg_xact-like manager that stores an array of
7 * MultiXactMember for each MultiXactId. It is a fundamental part of the
8 * shared-row-lock implementation. Each MultiXactMember is comprised of a
9 * TransactionId and a set of flag bits. The name is a bit historical:
10 * originally, a MultiXactId consisted of more than one TransactionId (except
11 * in rare corner cases), hence "multi". Nowadays, however, it's perfectly
12 * legitimate to have MultiXactIds that only include a single Xid.
13 *
14 * The meaning of the flag bits is opaque to this module, but they are mostly
15 * used in heapam.c to identify lock modes that each of the member transactions
16 * is holding on any given tuple. This module just contains support to store
17 * and retrieve the arrays.
18 *
19 * We use two SLRU areas, one for storing the offsets at which the data
20 * starts for each MultiXactId in the other one. This trick allows us to
21 * store variable length arrays of TransactionIds. (We could alternatively
22 * use one area containing counts and TransactionIds, with valid MultiXactId
23 * values pointing at slots containing counts; but that way seems less robust
24 * since it would get completely confused if someone inquired about a bogus
25 * MultiXactId that pointed to an intermediate slot containing an XID.)
26 *
27 * XLOG interactions: this module generates a record whenever a new OFFSETs or
28 * MEMBERs page is initialized to zeroes, as well as an
29 * XLOG_MULTIXACT_CREATE_ID record whenever a new MultiXactId is defined.
30 * This module ignores the WAL rule "write xlog before data," because it
31 * suffices that actions recording a MultiXactId in a heap xmax do follow that
32 * rule. The only way for the MXID to be referenced from any data page is for
33 * heap_lock_tuple() or heap_update() to have put it there, and each generates
34 * an XLOG record that must follow ours. The normal LSN interlock between the
35 * data page and that XLOG record will ensure that our XLOG record reaches
36 * disk first. If the SLRU members/offsets data reaches disk sooner than the
37 * XLOG records, we do not care; after recovery, no xmax will refer to it. On
38 * the flip side, to ensure that all referenced entries _do_ reach disk, this
39 * module's XLOG records completely rebuild the data entered since the last
40 * checkpoint. We flush and sync all dirty OFFSETs and MEMBERs pages to disk
41 * before each checkpoint is considered complete.
42 *
43 * Like clog.c, and unlike subtrans.c, we have to preserve state across
44 * crashes and ensure that MXID and offset numbering increases monotonically
45 * across a crash. We do this in the same way as it's done for transaction
46 * IDs: the WAL record is guaranteed to contain evidence of every MXID we
47 * could need to worry about, and we just make sure that at the end of
48 * replay, the next-MXID and next-offset counters are at least as large as
49 * anything we saw during replay.
50 *
51 * We are able to remove segments no longer necessary by carefully tracking
52 * each table's used values: during vacuum, any multixact older than a certain
53 * value is removed; the cutoff value is stored in pg_class. The minimum value
54 * across all tables in each database is stored in pg_database, and the global
55 * minimum across all databases is part of pg_control and is kept in shared
56 * memory. Whenever that minimum is advanced, the SLRUs are truncated.
57 *
58 * When new multixactid values are to be created, care is taken that the
59 * counter does not fall within the wraparound horizon considering the global
60 * minimum value.
61 *
62 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
63 * Portions Copyright (c) 1994, Regents of the University of California
64 *
65 * src/backend/access/transam/multixact.c
66 *
67 *-------------------------------------------------------------------------
68 */
69#include "postgres.h"
70
71#include "access/multixact.h"
73#include "access/slru.h"
74#include "access/twophase.h"
76#include "access/xlog.h"
77#include "access/xloginsert.h"
78#include "access/xlogutils.h"
79#include "miscadmin.h"
80#include "pg_trace.h"
81#include "pgstat.h"
83#include "storage/pmsignal.h"
84#include "storage/proc.h"
85#include "storage/procarray.h"
86#include "utils/guc_hooks.h"
88#include "utils/lsyscache.h"
89#include "utils/memutils.h"
90
91
92/*
93 * Thresholds used to keep members disk usage in check when multixids have a
94 * lot of members. When MULTIXACT_MEMBER_LOW_THRESHOLD is reached, vacuum
95 * starts freezing multixids more aggressively, even if the normal multixid
96 * age limits haven't been reached yet.
97 */
98#define MULTIXACT_MEMBER_LOW_THRESHOLD UINT64CONST(2000000000)
99#define MULTIXACT_MEMBER_HIGH_THRESHOLD UINT64CONST(4000000000)
100
101static inline MultiXactId
103{
104 return multi == MaxMultiXactId ? FirstMultiXactId : multi + 1;
105}
106
107static inline MultiXactId
109{
110 return multi == FirstMultiXactId ? MaxMultiXactId : multi - 1;
111}
112
113/*
114 * Links to shared-memory data structures for MultiXact control
115 */
118
119#define MultiXactOffsetCtl (&MultiXactOffsetCtlData)
120#define MultiXactMemberCtl (&MultiXactMemberCtlData)
121
122/*
123 * MultiXact state shared across all backends. All this state is protected
124 * by MultiXactGenLock. (We also use SLRU bank's lock of MultiXactOffset and
125 * MultiXactMember to guard accesses to the two sets of SLRU buffers. For
126 * concurrency's sake, we avoid holding more than one of these locks at a
127 * time.)
128 */
129typedef struct MultiXactStateData
130{
131 /* next-to-be-assigned MultiXactId */
133
134 /* next-to-be-assigned offset */
136
137 /* Have we completed multixact startup? */
139
140 /*
141 * Oldest multixact that is still potentially referenced by a relation.
142 * Anything older than this should not be consulted. These values are
143 * updated by vacuum.
144 */
147
148 /*
149 * Oldest multixact offset that is potentially referenced by a multixact
150 * referenced by a relation.
151 */
153
154 /* support for anti-wraparound measures */
159
160 /*
161 * Per-backend data starts here. We have two arrays stored in the area
162 * immediately following the MultiXactStateData struct. Each is indexed by
163 * ProcNumber.
164 *
165 * In both arrays, there's a slot for all normal backends
166 * (0..MaxBackends-1) followed by a slot for max_prepared_xacts prepared
167 * transactions.
168 *
169 * OldestMemberMXactId[k] is the oldest MultiXactId each backend's current
170 * transaction(s) could possibly be a member of, or InvalidMultiXactId
171 * when the backend has no live transaction that could possibly be a
172 * member of a MultiXact. Each backend sets its entry to the current
173 * nextMXact counter just before first acquiring a shared lock in a given
174 * transaction, and clears it at transaction end. (This works because only
175 * during or after acquiring a shared lock could an XID possibly become a
176 * member of a MultiXact, and that MultiXact would have to be created
177 * during or after the lock acquisition.)
178 *
179 * OldestVisibleMXactId[k] is the oldest MultiXactId each backend's
180 * current transaction(s) think is potentially live, or InvalidMultiXactId
181 * when not in a transaction or not in a transaction that's paid any
182 * attention to MultiXacts yet. This is computed when first needed in a
183 * given transaction, and cleared at transaction end. We can compute it
184 * as the minimum of the valid OldestMemberMXactId[] entries at the time
185 * we compute it (using nextMXact if none are valid). Each backend is
186 * required not to attempt to access any SLRU data for MultiXactIds older
187 * than its own OldestVisibleMXactId[] setting; this is necessary because
188 * the relevant SLRU data can be concurrently truncated away.
189 *
190 * The oldest valid value among all of the OldestMemberMXactId[] and
191 * OldestVisibleMXactId[] entries is considered by vacuum as the earliest
192 * possible value still having any live member transaction -- OldestMxact.
193 * Any value older than that is typically removed from tuple headers, or
194 * "frozen" via being replaced with a new xmax. VACUUM can sometimes even
195 * remove an individual MultiXact xmax whose value is >= its OldestMxact
196 * cutoff, though typically only when no individual member XID is still
197 * running. See FreezeMultiXactId for full details.
198 *
199 * Whenever VACUUM advances relminmxid, then either its OldestMxact cutoff
200 * or the oldest extant Multi remaining in the table is used as the new
201 * pg_class.relminmxid value (whichever is earlier). The minimum of all
202 * relminmxid values in each database is stored in pg_database.datminmxid.
203 * In turn, the minimum of all of those values is stored in pg_control.
204 * This is used as the truncation point for pg_multixact when unneeded
205 * segments get removed by vac_truncate_clog() during vacuuming.
206 */
209
210/*
211 * Size of OldestMemberMXactId and OldestVisibleMXactId arrays.
212 */
213#define MaxOldestSlot (MaxBackends + max_prepared_xacts)
214
215/* Pointers to the state data in shared memory */
219
220
221/*
222 * Definitions for the backend-local MultiXactId cache.
223 *
224 * We use this cache to store known MultiXacts, so we don't need to go to
225 * SLRU areas every time.
226 *
227 * The cache lasts for the duration of a single transaction, the rationale
228 * for this being that most entries will contain our own TransactionId and
229 * so they will be uninteresting by the time our next transaction starts.
230 * (XXX not clear that this is correct --- other members of the MultiXact
231 * could hang around longer than we did. However, it's not clear what a
232 * better policy for flushing old cache entries would be.) FIXME actually
233 * this is plain wrong now that multixact's may contain update Xids.
234 *
235 * We allocate the cache entries in a memory context that is deleted at
236 * transaction end, so we don't need to do retail freeing of entries.
237 */
238typedef struct mXactCacheEnt
239{
245
246#define MAX_CACHE_ENTRIES 256
249
250#ifdef MULTIXACT_DEBUG
251#define debug_elog2(a,b) elog(a,b)
252#define debug_elog3(a,b,c) elog(a,b,c)
253#define debug_elog4(a,b,c,d) elog(a,b,c,d)
254#define debug_elog5(a,b,c,d,e) elog(a,b,c,d,e)
255#define debug_elog6(a,b,c,d,e,f) elog(a,b,c,d,e,f)
256#else
257#define debug_elog2(a,b)
258#define debug_elog3(a,b,c)
259#define debug_elog4(a,b,c,d)
260#define debug_elog5(a,b,c,d,e)
261#define debug_elog6(a,b,c,d,e,f)
262#endif
263
264/* internal MultiXactId management */
265static void MultiXactIdSetOldestVisible(void);
266static void RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
267 int nmembers, MultiXactMember *members);
268static MultiXactId GetNewMultiXactId(int nmembers, MultiXactOffset *offset);
269
270/* MultiXact cache management */
271static int mxactMemberComparator(const void *arg1, const void *arg2);
272static MultiXactId mXactCacheGetBySet(int nmembers, MultiXactMember *members);
273static int mXactCacheGetById(MultiXactId multi, MultiXactMember **members);
274static void mXactCachePut(MultiXactId multi, int nmembers,
275 MultiXactMember *members);
276
277/* management of SLRU infrastructure */
278static bool MultiXactOffsetPagePrecedes(int64 page1, int64 page2);
279static bool MultiXactMemberPagePrecedes(int64 page1, int64 page2);
280static void ExtendMultiXactOffset(MultiXactId multi);
281static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers);
282static void SetOldestOffset(void);
283static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result);
284static void WriteMTruncateXlogRec(Oid oldestMultiDB,
285 MultiXactId startTruncOff,
286 MultiXactId endTruncOff,
287 MultiXactOffset startTruncMemb,
288 MultiXactOffset endTruncMemb);
289
290
291/*
292 * MultiXactIdCreate
293 * Construct a MultiXactId representing two TransactionIds.
294 *
295 * The two XIDs must be different, or be requesting different statuses.
296 *
297 * NB - we don't worry about our local MultiXactId cache here, because that
298 * is handled by the lower-level routines.
299 */
302 TransactionId xid2, MultiXactStatus status2)
303{
304 MultiXactId newMulti;
305 MultiXactMember members[2];
306
309
310 Assert(!TransactionIdEquals(xid1, xid2) || (status1 != status2));
311
312 /* MultiXactIdSetOldestMember() must have been called already. */
314
315 /*
316 * Note: unlike MultiXactIdExpand, we don't bother to check that both XIDs
317 * are still running. In typical usage, xid2 will be our own XID and the
318 * caller just did a check on xid1, so it'd be wasted effort.
319 */
320
321 members[0].xid = xid1;
322 members[0].status = status1;
323 members[1].xid = xid2;
324 members[1].status = status2;
325
326 newMulti = MultiXactIdCreateFromMembers(2, members);
327
328 debug_elog3(DEBUG2, "Create: %s",
329 mxid_to_string(newMulti, 2, members));
330
331 return newMulti;
332}
333
334/*
335 * MultiXactIdExpand
336 * Add a TransactionId to a pre-existing MultiXactId.
337 *
338 * If the TransactionId is already a member of the passed MultiXactId with the
339 * same status, just return it as-is.
340 *
341 * Note that we do NOT actually modify the membership of a pre-existing
342 * MultiXactId; instead we create a new one. This is necessary to avoid
343 * a race condition against code trying to wait for one MultiXactId to finish;
344 * see notes in heapam.c.
345 *
346 * NB - we don't worry about our local MultiXactId cache here, because that
347 * is handled by the lower-level routines.
348 *
349 * Note: It is critical that MultiXactIds that come from an old cluster (i.e.
350 * one upgraded by pg_upgrade from a cluster older than this feature) are not
351 * passed in.
352 */
355{
356 MultiXactId newMulti;
357 MultiXactMember *members;
358 MultiXactMember *newMembers;
359 int nmembers;
360 int i;
361 int j;
362
365
366 /* MultiXactIdSetOldestMember() must have been called already. */
368
369 debug_elog5(DEBUG2, "Expand: received multi %u, xid %u status %s",
370 multi, xid, mxstatus_to_string(status));
371
372 /*
373 * Note: we don't allow for old multis here. The reason is that the only
374 * caller of this function does a check that the multixact is no longer
375 * running.
376 */
377 nmembers = GetMultiXactIdMembers(multi, &members, false, false);
378
379 if (nmembers < 0)
380 {
381 MultiXactMember member;
382
383 /*
384 * The MultiXactId is obsolete. This can only happen if all the
385 * MultiXactId members stop running between the caller checking and
386 * passing it to us. It would be better to return that fact to the
387 * caller, but it would complicate the API and it's unlikely to happen
388 * too often, so just deal with it by creating a singleton MultiXact.
389 */
390 member.xid = xid;
391 member.status = status;
392 newMulti = MultiXactIdCreateFromMembers(1, &member);
393
394 debug_elog4(DEBUG2, "Expand: %u has no members, create singleton %u",
395 multi, newMulti);
396 return newMulti;
397 }
398
399 /*
400 * If the TransactionId is already a member of the MultiXactId with the
401 * same status, just return the existing MultiXactId.
402 */
403 for (i = 0; i < nmembers; i++)
404 {
405 if (TransactionIdEquals(members[i].xid, xid) &&
406 (members[i].status == status))
407 {
408 debug_elog4(DEBUG2, "Expand: %u is already a member of %u",
409 xid, multi);
410 pfree(members);
411 return multi;
412 }
413 }
414
415 /*
416 * Determine which of the members of the MultiXactId are still of
417 * interest. This is any running transaction, and also any transaction
418 * that grabbed something stronger than just a lock and was committed. (An
419 * update that aborted is of no interest here; and having more than one
420 * update Xid in a multixact would cause errors elsewhere.)
421 *
422 * Removing dead members is not just an optimization: freezing of tuples
423 * whose Xmax are multis depends on this behavior.
424 *
425 * Note we have the same race condition here as above: j could be 0 at the
426 * end of the loop.
427 */
428 newMembers = palloc_array(MultiXactMember, nmembers + 1);
429
430 for (i = 0, j = 0; i < nmembers; i++)
431 {
432 if (TransactionIdIsInProgress(members[i].xid) ||
433 (ISUPDATE_from_mxstatus(members[i].status) &&
434 TransactionIdDidCommit(members[i].xid)))
435 {
436 newMembers[j].xid = members[i].xid;
437 newMembers[j++].status = members[i].status;
438 }
439 }
440
441 newMembers[j].xid = xid;
442 newMembers[j++].status = status;
443 newMulti = MultiXactIdCreateFromMembers(j, newMembers);
444
445 pfree(members);
446 pfree(newMembers);
447
448 debug_elog3(DEBUG2, "Expand: returning new multi %u", newMulti);
449
450 return newMulti;
451}
452
453/*
454 * MultiXactIdIsRunning
455 * Returns whether a MultiXactId is "running".
456 *
457 * We return true if at least one member of the given MultiXactId is still
458 * running. Note that a "false" result is certain not to change,
459 * because it is not legal to add members to an existing MultiXactId.
460 *
461 * Caller is expected to have verified that the multixact does not come from
462 * a pg_upgraded share-locked tuple.
463 */
464bool
465MultiXactIdIsRunning(MultiXactId multi, bool isLockOnly)
466{
467 MultiXactMember *members;
468 int nmembers;
469 int i;
470
471 debug_elog3(DEBUG2, "IsRunning %u?", multi);
472
473 /*
474 * "false" here means we assume our callers have checked that the given
475 * multi cannot possibly come from a pg_upgraded database.
476 */
477 nmembers = GetMultiXactIdMembers(multi, &members, false, isLockOnly);
478
479 if (nmembers <= 0)
480 {
481 debug_elog2(DEBUG2, "IsRunning: no members");
482 return false;
483 }
484
485 /*
486 * Checking for myself is cheap compared to looking in shared memory;
487 * return true if any live subtransaction of the current top-level
488 * transaction is a member.
489 *
490 * This is not needed for correctness, it's just a fast path.
491 */
492 for (i = 0; i < nmembers; i++)
493 {
494 if (TransactionIdIsCurrentTransactionId(members[i].xid))
495 {
496 debug_elog3(DEBUG2, "IsRunning: I (%d) am running!", i);
497 pfree(members);
498 return true;
499 }
500 }
501
502 /*
503 * This could be made faster by having another entry point in procarray.c,
504 * walking the PGPROC array only once for all the members. But in most
505 * cases nmembers should be small enough that it doesn't much matter.
506 */
507 for (i = 0; i < nmembers; i++)
508 {
509 if (TransactionIdIsInProgress(members[i].xid))
510 {
511 debug_elog4(DEBUG2, "IsRunning: member %d (%u) is running",
512 i, members[i].xid);
513 pfree(members);
514 return true;
515 }
516 }
517
518 pfree(members);
519
520 debug_elog3(DEBUG2, "IsRunning: %u is not running", multi);
521
522 return false;
523}
524
525/*
526 * MultiXactIdSetOldestMember
527 * Save the oldest MultiXactId this transaction could be a member of.
528 *
529 * We set the OldestMemberMXactId for a given transaction the first time it's
530 * going to do some operation that might require a MultiXactId (tuple lock,
531 * update or delete). We need to do this even if we end up using a
532 * TransactionId instead of a MultiXactId, because there is a chance that
533 * another transaction would add our XID to a MultiXactId.
534 *
535 * The value to set is the next-to-be-assigned MultiXactId, so this is meant to
536 * be called just before doing any such possibly-MultiXactId-able operation.
537 */
538void
540{
542 {
543 MultiXactId nextMXact;
544
545 /*
546 * You might think we don't need to acquire a lock here, since
547 * fetching and storing of TransactionIds is probably atomic, but in
548 * fact we do: suppose we pick up nextMXact and then lose the CPU for
549 * a long time. Someone else could advance nextMXact, and then
550 * another someone else could compute an OldestVisibleMXactId that
551 * would be after the value we are going to store when we get control
552 * back. Which would be wrong.
553 *
554 * Note that a shared lock is sufficient, because it's enough to stop
555 * someone from advancing nextMXact; and nobody else could be trying
556 * to write to our OldestMember entry, only reading (and we assume
557 * storing it is atomic.)
558 */
559 LWLockAcquire(MultiXactGenLock, LW_SHARED);
560
561 nextMXact = MultiXactState->nextMXact;
562
564
565 LWLockRelease(MultiXactGenLock);
566
567 debug_elog4(DEBUG2, "MultiXact: setting OldestMember[%d] = %u",
568 MyProcNumber, nextMXact);
569 }
570}
571
572/*
573 * MultiXactIdSetOldestVisible
574 * Save the oldest MultiXactId this transaction considers possibly live.
575 *
576 * We set the OldestVisibleMXactId for a given transaction the first time
577 * it's going to inspect any MultiXactId. Once we have set this, we are
578 * guaranteed that SLRU data for MultiXactIds >= our own OldestVisibleMXactId
579 * won't be truncated away.
580 *
581 * The value to set is the oldest of nextMXact and all the valid per-backend
582 * OldestMemberMXactId[] entries. Because of the locking we do, we can be
583 * certain that no subsequent call to MultiXactIdSetOldestMember can set
584 * an OldestMemberMXactId[] entry older than what we compute here. Therefore
585 * there is no live transaction, now or later, that can be a member of any
586 * MultiXactId older than the OldestVisibleMXactId we compute here.
587 */
588static void
590{
592 {
593 MultiXactId oldestMXact;
594 int i;
595
596 LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
597
598 oldestMXact = MultiXactState->nextMXact;
599 for (i = 0; i < MaxOldestSlot; i++)
600 {
601 MultiXactId thisoldest = OldestMemberMXactId[i];
602
603 if (MultiXactIdIsValid(thisoldest) &&
604 MultiXactIdPrecedes(thisoldest, oldestMXact))
605 oldestMXact = thisoldest;
606 }
607
608 OldestVisibleMXactId[MyProcNumber] = oldestMXact;
609
610 LWLockRelease(MultiXactGenLock);
611
612 debug_elog4(DEBUG2, "MultiXact: setting OldestVisible[%d] = %u",
613 MyProcNumber, oldestMXact);
614 }
615}
616
617/*
618 * ReadNextMultiXactId
619 * Return the next MultiXactId to be assigned, but don't allocate it
620 */
623{
624 MultiXactId mxid;
625
626 /* XXX we could presumably do this without a lock. */
627 LWLockAcquire(MultiXactGenLock, LW_SHARED);
629 LWLockRelease(MultiXactGenLock);
630
631 return mxid;
632}
633
634/*
635 * ReadMultiXactIdRange
636 * Get the range of IDs that may still be referenced by a relation.
637 */
638void
640{
641 LWLockAcquire(MultiXactGenLock, LW_SHARED);
644 LWLockRelease(MultiXactGenLock);
645}
646
647
648/*
649 * MultiXactIdCreateFromMembers
650 * Make a new MultiXactId from the specified set of members
651 *
652 * Make XLOG, SLRU and cache entries for a new MultiXactId, recording the
653 * given TransactionIds as members. Returns the newly created MultiXactId.
654 *
655 * NB: the passed members[] array will be sorted in-place.
656 */
659{
660 MultiXactId multi;
661 MultiXactOffset offset;
663
664 debug_elog3(DEBUG2, "Create: %s",
665 mxid_to_string(InvalidMultiXactId, nmembers, members));
666
667 /*
668 * See if the same set of members already exists in our cache; if so, just
669 * re-use that MultiXactId. (Note: it might seem that looking in our
670 * cache is insufficient, and we ought to search disk to see if a
671 * duplicate definition already exists. But since we only ever create
672 * MultiXacts containing our own XID, in most cases any such MultiXacts
673 * were in fact created by us, and so will be in our cache. There are
674 * corner cases where someone else added us to a MultiXact without our
675 * knowledge, but it's not worth checking for.)
676 */
677 multi = mXactCacheGetBySet(nmembers, members);
678 if (MultiXactIdIsValid(multi))
679 {
680 debug_elog2(DEBUG2, "Create: in cache!");
681 return multi;
682 }
683
684 /* Verify that there is a single update Xid among the given members. */
685 {
686 int i;
687 bool has_update = false;
688
689 for (i = 0; i < nmembers; i++)
690 {
691 if (ISUPDATE_from_mxstatus(members[i].status))
692 {
693 if (has_update)
694 elog(ERROR, "new multixact has more than one updating member: %s",
695 mxid_to_string(InvalidMultiXactId, nmembers, members));
696 has_update = true;
697 }
698 }
699 }
700
701 /* Load the injection point before entering the critical section */
702 INJECTION_POINT_LOAD("multixact-create-from-members");
703
704 /*
705 * Assign the MXID and offsets range to use, and make sure there is space
706 * in the OFFSETs and MEMBERs files. NB: this routine does
707 * START_CRIT_SECTION().
708 *
709 * Note: unlike MultiXactIdCreate and MultiXactIdExpand, we do not check
710 * that we've called MultiXactIdSetOldestMember here. This is because
711 * this routine is used in some places to create new MultiXactIds of which
712 * the current backend is not a member, notably during freezing of multis
713 * in vacuum. During vacuum, in particular, it would be unacceptable to
714 * keep OldestMulti set, in case it runs for long.
715 */
716 multi = GetNewMultiXactId(nmembers, &offset);
717
718 INJECTION_POINT_CACHED("multixact-create-from-members", NULL);
719
720 /* Make an XLOG entry describing the new MXID. */
721 xlrec.mid = multi;
722 xlrec.moff = offset;
723 xlrec.nmembers = nmembers;
724
725 /*
726 * XXX Note: there's a lot of padding space in MultiXactMember. We could
727 * find a more compact representation of this Xlog record -- perhaps all
728 * the status flags in one XLogRecData, then all the xids in another one?
729 * Not clear that it's worth the trouble though.
730 */
733 XLogRegisterData(members, nmembers * sizeof(MultiXactMember));
734
735 (void) XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_CREATE_ID);
736
737 /* Now enter the information into the OFFSETs and MEMBERs logs */
738 RecordNewMultiXact(multi, offset, nmembers, members);
739
740 /* Done with critical section */
742
743 /* Store the new MultiXactId in the local cache, too */
744 mXactCachePut(multi, nmembers, members);
745
746 debug_elog2(DEBUG2, "Create: all done");
747
748 return multi;
749}
750
751/*
752 * RecordNewMultiXact
753 * Write info about a new multixact into the offsets and members files
754 *
755 * This is broken out of MultiXactIdCreateFromMembers so that xlog replay can
756 * use it.
757 */
758static void
760 int nmembers, MultiXactMember *members)
761{
762 int64 pageno;
763 int64 prev_pageno;
764 int entryno;
765 int slotno;
766 MultiXactOffset *offptr;
768 int64 next_pageno;
769 int next_entryno;
770 MultiXactOffset *next_offptr;
771 MultiXactOffset next_offset;
772 LWLock *lock;
773 LWLock *prevlock = NULL;
774
775 /* position of this multixid in the offsets SLRU area */
776 pageno = MultiXactIdToOffsetPage(multi);
777 entryno = MultiXactIdToOffsetEntry(multi);
778
779 /* position of the next multixid */
780 next = NextMultiXactId(multi);
781 next_pageno = MultiXactIdToOffsetPage(next);
782 next_entryno = MultiXactIdToOffsetEntry(next);
783
784 /*
785 * Set the starting offset of this multixid's members.
786 *
787 * In the common case, it was already be set by the previous
788 * RecordNewMultiXact call, as this was the next multixid of the previous
789 * multixid. But if multiple backends are generating multixids
790 * concurrently, we might race ahead and get called before the previous
791 * multixid.
792 */
795
796 /*
797 * Note: we pass the MultiXactId to SimpleLruReadPage as the "transaction"
798 * to complain about if there's any I/O error. This is kinda bogus, but
799 * since the errors will always give the full pathname, it should be clear
800 * enough that a MultiXactId is really involved. Perhaps someday we'll
801 * take the trouble to generalize the slru.c error reporting code.
802 */
803 slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, multi);
804 offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
805 offptr += entryno;
806
807 if (*offptr != offset)
808 {
809 /* should already be set to the correct value, or not at all */
810 Assert(*offptr == 0);
811 *offptr = offset;
812 MultiXactOffsetCtl->shared->page_dirty[slotno] = true;
813 }
814
815 /*
816 * Set the next multixid's offset to the end of this multixid's members.
817 */
818 if (next_pageno == pageno)
819 {
820 next_offptr = offptr + 1;
821 }
822 else
823 {
824 /* must be the first entry on the page */
825 Assert(next_entryno == 0 || next == FirstMultiXactId);
826
827 /* Swap the lock for a lock on the next page */
828 LWLockRelease(lock);
829 lock = SimpleLruGetBankLock(MultiXactOffsetCtl, next_pageno);
831
832 slotno = SimpleLruReadPage(MultiXactOffsetCtl, next_pageno, true, next);
833 next_offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
834 next_offptr += next_entryno;
835 }
836
837 /* Like in GetNewMultiXactId(), skip over offset 0 */
838 next_offset = offset + nmembers;
839 if (next_offset == 0)
840 next_offset = 1;
841 if (*next_offptr != next_offset)
842 {
843 /* should already be set to the correct value, or not at all */
844 Assert(*next_offptr == 0);
845 *next_offptr = next_offset;
846 MultiXactOffsetCtl->shared->page_dirty[slotno] = true;
847 }
848
849 /* Release MultiXactOffset SLRU lock. */
850 LWLockRelease(lock);
851
852 prev_pageno = -1;
853
854 for (int i = 0; i < nmembers; i++, offset++)
855 {
856 TransactionId *memberptr;
857 uint32 *flagsptr;
858 uint32 flagsval;
859 int bshift;
860 int flagsoff;
861 int memberoff;
862
863 Assert(members[i].status <= MultiXactStatusUpdate);
864
865 pageno = MXOffsetToMemberPage(offset);
866 memberoff = MXOffsetToMemberOffset(offset);
867 flagsoff = MXOffsetToFlagsOffset(offset);
868 bshift = MXOffsetToFlagsBitShift(offset);
869
870 if (pageno != prev_pageno)
871 {
872 /*
873 * MultiXactMember SLRU page is changed so check if this new page
874 * fall into the different SLRU bank then release the old bank's
875 * lock and acquire lock on the new bank.
876 */
878 if (lock != prevlock)
879 {
880 if (prevlock != NULL)
881 LWLockRelease(prevlock);
882
884 prevlock = lock;
885 }
886 slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, true, multi);
887 prev_pageno = pageno;
888 }
889
890 memberptr = (TransactionId *)
891 (MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
892
893 *memberptr = members[i].xid;
894
895 flagsptr = (uint32 *)
896 (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff);
897
898 flagsval = *flagsptr;
899 flagsval &= ~(((1 << MXACT_MEMBER_BITS_PER_XACT) - 1) << bshift);
900 flagsval |= (members[i].status << bshift);
901 *flagsptr = flagsval;
902
903 MultiXactMemberCtl->shared->page_dirty[slotno] = true;
904 }
905
906 if (prevlock != NULL)
907 LWLockRelease(prevlock);
908}
909
910/*
911 * GetNewMultiXactId
912 * Get the next MultiXactId.
913 *
914 * Also, reserve the needed amount of space in the "members" area. The
915 * starting offset of the reserved space is returned in *offset.
916 *
917 * This may generate XLOG records for expansion of the offsets and/or members
918 * files. Unfortunately, we have to do that while holding MultiXactGenLock
919 * to avoid race conditions --- the XLOG record for zeroing a page must appear
920 * before any backend can possibly try to store data in that page!
921 *
922 * We start a critical section before advancing the shared counters. The
923 * caller must end the critical section after writing SLRU data.
924 */
925static MultiXactId
926GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
927{
928 MultiXactId result;
929 MultiXactOffset nextOffset;
930
931 debug_elog3(DEBUG2, "GetNew: for %d xids", nmembers);
932
933 /* safety check, we should never get this far in a HS standby */
934 if (RecoveryInProgress())
935 elog(ERROR, "cannot assign MultiXactIds during recovery");
936
937 LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
938
939 /* Assign the MXID */
940 result = MultiXactState->nextMXact;
941
942 /*----------
943 * Check to see if it's safe to assign another MultiXactId. This protects
944 * against catastrophic data loss due to multixact wraparound. The basic
945 * rules are:
946 *
947 * If we're past multiVacLimit or the safe threshold for member storage
948 * space, or we don't know what the safe threshold for member storage is,
949 * start trying to force autovacuum cycles.
950 * If we're past multiWarnLimit, start issuing warnings.
951 * If we're past multiStopLimit, refuse to create new MultiXactIds.
952 *
953 * Note these are pretty much the same protections in GetNewTransactionId.
954 *----------
955 */
957 {
958 /*
959 * For safety's sake, we release MultiXactGenLock while sending
960 * signals, warnings, etc. This is not so much because we care about
961 * preserving concurrency in this situation, as to avoid any
962 * possibility of deadlock while doing get_database_name(). First,
963 * copy all the shared values we'll need in this path.
964 */
965 MultiXactId multiWarnLimit = MultiXactState->multiWarnLimit;
966 MultiXactId multiStopLimit = MultiXactState->multiStopLimit;
967 MultiXactId multiWrapLimit = MultiXactState->multiWrapLimit;
968 Oid oldest_datoid = MultiXactState->oldestMultiXactDB;
969
970 LWLockRelease(MultiXactGenLock);
971
972 if (IsUnderPostmaster &&
973 !MultiXactIdPrecedes(result, multiStopLimit))
974 {
975 char *oldest_datname = get_database_name(oldest_datoid);
976
977 /*
978 * Immediately kick autovacuum into action as we're already in
979 * ERROR territory.
980 */
982
983 /* complain even if that DB has disappeared */
984 if (oldest_datname)
986 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
987 errmsg("database is not accepting commands that assign new MultiXactIds to avoid wraparound data loss in database \"%s\"",
988 oldest_datname),
989 errhint("Execute a database-wide VACUUM in that database.\n"
990 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
991 else
993 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
994 errmsg("database is not accepting commands that assign new MultiXactIds to avoid wraparound data loss in database with OID %u",
995 oldest_datoid),
996 errhint("Execute a database-wide VACUUM in that database.\n"
997 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
998 }
999
1000 /*
1001 * To avoid swamping the postmaster with signals, we issue the autovac
1002 * request only once per 64K multis generated. This still gives
1003 * plenty of chances before we get into real trouble.
1004 */
1005 if (IsUnderPostmaster && ((result % 65536) == 0 || result == FirstMultiXactId))
1007
1008 if (!MultiXactIdPrecedes(result, multiWarnLimit))
1009 {
1010 char *oldest_datname = get_database_name(oldest_datoid);
1011
1012 /* complain even if that DB has disappeared */
1013 if (oldest_datname)
1015 (errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used",
1016 "database \"%s\" must be vacuumed before %u more MultiXactIds are used",
1017 multiWrapLimit - result,
1018 oldest_datname,
1019 multiWrapLimit - result),
1020 errhint("Execute a database-wide VACUUM in that database.\n"
1021 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1022 else
1024 (errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used",
1025 "database with OID %u must be vacuumed before %u more MultiXactIds are used",
1026 multiWrapLimit - result,
1027 oldest_datoid,
1028 multiWrapLimit - result),
1029 errhint("Execute a database-wide VACUUM in that database.\n"
1030 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1031 }
1032
1033 /* Re-acquire lock and start over */
1034 LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
1035 result = MultiXactState->nextMXact;
1036 }
1037
1038 /*
1039 * Make sure there is room for the next MXID in the file. Assigning this
1040 * MXID sets the next MXID's offset already.
1041 */
1043
1044 /*
1045 * Reserve the members space, similarly to above.
1046 */
1047 nextOffset = MultiXactState->nextOffset;
1048
1049 /*
1050 * Offsets are 64-bit integers and will never wrap around. Firstly, it
1051 * would take an unrealistic amount of time and resources to consume 2^64
1052 * offsets. Secondly, multixid creation is WAL-logged, so you would run
1053 * out of LSNs before reaching offset wraparound. Nevertheless, check for
1054 * wraparound as a sanity check.
1055 */
1056 if (nextOffset + nmembers < nextOffset)
1057 ereport(ERROR,
1058 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1059 errmsg("MultiXact members would wrap around")));
1060 *offset = nextOffset;
1061
1062 ExtendMultiXactMember(nextOffset, nmembers);
1063
1064 /*
1065 * Critical section from here until caller has written the data into the
1066 * just-reserved SLRU space; we don't want to error out with a partly
1067 * written MultiXact structure. (In particular, failing to write our
1068 * start offset after advancing nextMXact would effectively corrupt the
1069 * previous MultiXact.)
1070 */
1072
1073 /*
1074 * Advance counters. As in GetNewTransactionId(), this must not happen
1075 * until after file extension has succeeded!
1076 */
1078 MultiXactState->nextOffset += nmembers;
1079
1080 LWLockRelease(MultiXactGenLock);
1081
1082 debug_elog4(DEBUG2, "GetNew: returning %u offset %" PRIu64,
1083 result, *offset);
1084 return result;
1085}
1086
1087/*
1088 * GetMultiXactIdMembers
1089 * Return the set of MultiXactMembers that make up a MultiXactId
1090 *
1091 * Return value is the number of members found, or -1 if there are none,
1092 * and *members is set to a newly palloc'ed array of members. It's the
1093 * caller's responsibility to free it when done with it.
1094 *
1095 * from_pgupgrade must be passed as true if and only if only the multixact
1096 * corresponds to a value from a tuple that was locked in a 9.2-or-older
1097 * installation and later pg_upgrade'd (that is, the infomask is
1098 * HEAP_LOCKED_UPGRADED). In this case, we know for certain that no members
1099 * can still be running, so we return -1 just like for an empty multixact
1100 * without any further checking. It would be wrong to try to resolve such a
1101 * multixact: either the multixact is within the current valid multixact
1102 * range, in which case the returned result would be bogus, or outside that
1103 * range, in which case an error would be raised.
1104 *
1105 * In all other cases, the passed multixact must be within the known valid
1106 * range, that is, greater than or equal to oldestMultiXactId, and less than
1107 * nextMXact. Otherwise, an error is raised.
1108 *
1109 * isLockOnly must be set to true if caller is certain that the given multi
1110 * is used only to lock tuples; can be false without loss of correctness,
1111 * but passing a true means we can return quickly without checking for
1112 * old updates.
1113 */
1114int
1116 bool from_pgupgrade, bool isLockOnly)
1117{
1118 int64 pageno;
1119 int64 prev_pageno;
1120 int entryno;
1121 int slotno;
1122 MultiXactOffset *offptr;
1123 MultiXactOffset offset;
1124 MultiXactOffset nextMXOffset;
1125 int length;
1126 MultiXactId oldestMXact;
1127 MultiXactId nextMXact;
1128 MultiXactMember *ptr;
1129 LWLock *lock;
1130
1131 debug_elog3(DEBUG2, "GetMembers: asked for %u", multi);
1132
1133 if (!MultiXactIdIsValid(multi) || from_pgupgrade)
1134 {
1135 *members = NULL;
1136 return -1;
1137 }
1138
1139 /* See if the MultiXactId is in the local cache */
1140 length = mXactCacheGetById(multi, members);
1141 if (length >= 0)
1142 {
1143 debug_elog3(DEBUG2, "GetMembers: found %s in the cache",
1144 mxid_to_string(multi, length, *members));
1145 return length;
1146 }
1147
1148 /* Set our OldestVisibleMXactId[] entry if we didn't already */
1150
1151 /*
1152 * If we know the multi is used only for locking and not for updates, then
1153 * we can skip checking if the value is older than our oldest visible
1154 * multi. It cannot possibly still be running.
1155 */
1156 if (isLockOnly &&
1158 {
1159 debug_elog2(DEBUG2, "GetMembers: a locker-only multi is too old");
1160 *members = NULL;
1161 return -1;
1162 }
1163
1164 /*
1165 * We check known limits on MultiXact before resorting to the SLRU area.
1166 *
1167 * An ID older than MultiXactState->oldestMultiXactId cannot possibly be
1168 * useful; it has already been removed, or will be removed shortly, by
1169 * truncation. If one is passed, an error is raised.
1170 *
1171 * Also, an ID >= nextMXact shouldn't ever be seen here; if it is seen, it
1172 * implies undetected ID wraparound has occurred. This raises a hard
1173 * error.
1174 *
1175 * Shared lock is enough here since we aren't modifying any global state.
1176 * Acquire it just long enough to grab the current counter values.
1177 */
1178 LWLockAcquire(MultiXactGenLock, LW_SHARED);
1179
1180 oldestMXact = MultiXactState->oldestMultiXactId;
1181 nextMXact = MultiXactState->nextMXact;
1182
1183 LWLockRelease(MultiXactGenLock);
1184
1185 if (MultiXactIdPrecedes(multi, oldestMXact))
1186 ereport(ERROR,
1187 (errcode(ERRCODE_INTERNAL_ERROR),
1188 errmsg("MultiXactId %u does no longer exist -- apparent wraparound",
1189 multi)));
1190
1191 if (!MultiXactIdPrecedes(multi, nextMXact))
1192 ereport(ERROR,
1193 (errcode(ERRCODE_INTERNAL_ERROR),
1194 errmsg("MultiXactId %u has not been created yet -- apparent wraparound",
1195 multi)));
1196
1197 /*
1198 * Find out the offset at which we need to start reading MultiXactMembers
1199 * and the number of members in the multixact. We determine the latter as
1200 * the difference between this multixact's starting offset and the next
1201 * one's.
1202 */
1203 pageno = MultiXactIdToOffsetPage(multi);
1204 entryno = MultiXactIdToOffsetEntry(multi);
1205
1206 /* Acquire the bank lock for the page we need. */
1209
1210 /* read this multi's offset */
1211 slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, multi);
1212 offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
1213 offptr += entryno;
1214 offset = *offptr;
1215
1216 if (offset == 0)
1217 ereport(ERROR,
1219 errmsg("MultiXact %u has invalid offset", multi)));
1220
1221 /* read next multi's offset */
1222 {
1223 MultiXactId tmpMXact;
1224
1225 /* handle wraparound if needed */
1226 tmpMXact = NextMultiXactId(multi);
1227
1228 prev_pageno = pageno;
1229
1230 pageno = MultiXactIdToOffsetPage(tmpMXact);
1231 entryno = MultiXactIdToOffsetEntry(tmpMXact);
1232
1233 if (pageno != prev_pageno)
1234 {
1235 LWLock *newlock;
1236
1237 /*
1238 * Since we're going to access a different SLRU page, if this page
1239 * falls under a different bank, release the old bank's lock and
1240 * acquire the lock of the new bank.
1241 */
1242 newlock = SimpleLruGetBankLock(MultiXactOffsetCtl, pageno);
1243 if (newlock != lock)
1244 {
1245 LWLockRelease(lock);
1246 LWLockAcquire(newlock, LW_EXCLUSIVE);
1247 lock = newlock;
1248 }
1249 slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, tmpMXact);
1250 }
1251
1252 offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
1253 offptr += entryno;
1254 nextMXOffset = *offptr;
1255 }
1256
1257 LWLockRelease(lock);
1258 lock = NULL;
1259
1260 /* Sanity check the next offset */
1261 if (nextMXOffset == 0)
1262 ereport(ERROR,
1264 errmsg("MultiXact %u has invalid next offset", multi)));
1265 if (nextMXOffset == offset)
1266 ereport(ERROR,
1268 errmsg("MultiXact %u with offset (%" PRIu64 ") has zero members",
1269 multi, offset)));
1270 if (nextMXOffset < offset)
1271 ereport(ERROR,
1273 errmsg("MultiXact %u has offset (%" PRIu64 ") greater than its next offset (%" PRIu64 ")",
1274 multi, offset, nextMXOffset)));
1275 if (nextMXOffset - offset > INT32_MAX)
1276 ereport(ERROR,
1278 errmsg("MultiXact %u has too many members (%" PRIu64 ")",
1279 multi, nextMXOffset - offset)));
1280 length = nextMXOffset - offset;
1281
1282 /* read the members */
1283 ptr = (MultiXactMember *) palloc(length * sizeof(MultiXactMember));
1284 prev_pageno = -1;
1285 for (int i = 0; i < length; i++, offset++)
1286 {
1287 TransactionId *xactptr;
1288 uint32 *flagsptr;
1289 int flagsoff;
1290 int bshift;
1291 int memberoff;
1292
1293 pageno = MXOffsetToMemberPage(offset);
1294 memberoff = MXOffsetToMemberOffset(offset);
1295
1296 if (pageno != prev_pageno)
1297 {
1298 LWLock *newlock;
1299
1300 /*
1301 * Since we're going to access a different SLRU page, if this page
1302 * falls under a different bank, release the old bank's lock and
1303 * acquire the lock of the new bank.
1304 */
1305 newlock = SimpleLruGetBankLock(MultiXactMemberCtl, pageno);
1306 if (newlock != lock)
1307 {
1308 if (lock)
1309 LWLockRelease(lock);
1310 LWLockAcquire(newlock, LW_EXCLUSIVE);
1311 lock = newlock;
1312 }
1313
1314 slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, true, multi);
1315 prev_pageno = pageno;
1316 }
1317
1318 xactptr = (TransactionId *)
1319 (MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
1320 Assert(TransactionIdIsValid(*xactptr));
1321
1322 flagsoff = MXOffsetToFlagsOffset(offset);
1323 bshift = MXOffsetToFlagsBitShift(offset);
1324 flagsptr = (uint32 *) (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff);
1325
1326 ptr[i].xid = *xactptr;
1327 ptr[i].status = (*flagsptr >> bshift) & MXACT_MEMBER_XACT_BITMASK;
1328 }
1329
1330 LWLockRelease(lock);
1331
1332 /*
1333 * Copy the result into the local cache.
1334 */
1335 mXactCachePut(multi, length, ptr);
1336
1337 debug_elog3(DEBUG2, "GetMembers: no cache for %s",
1338 mxid_to_string(multi, length, ptr));
1339 *members = ptr;
1340 return length;
1341}
1342
1343/*
1344 * mxactMemberComparator
1345 * qsort comparison function for MultiXactMember
1346 *
1347 * We can't use wraparound comparison for XIDs because that does not respect
1348 * the triangle inequality! Any old sort order will do.
1349 */
1350static int
1351mxactMemberComparator(const void *arg1, const void *arg2)
1352{
1353 MultiXactMember member1 = *(const MultiXactMember *) arg1;
1354 MultiXactMember member2 = *(const MultiXactMember *) arg2;
1355
1356 if (member1.xid > member2.xid)
1357 return 1;
1358 if (member1.xid < member2.xid)
1359 return -1;
1360 if (member1.status > member2.status)
1361 return 1;
1362 if (member1.status < member2.status)
1363 return -1;
1364 return 0;
1365}
1366
1367/*
1368 * mXactCacheGetBySet
1369 * returns a MultiXactId from the cache based on the set of
1370 * TransactionIds that compose it, or InvalidMultiXactId if
1371 * none matches.
1372 *
1373 * This is helpful, for example, if two transactions want to lock a huge
1374 * table. By using the cache, the second will use the same MultiXactId
1375 * for the majority of tuples, thus keeping MultiXactId usage low (saving
1376 * both I/O and wraparound issues).
1377 *
1378 * NB: the passed members array will be sorted in-place.
1379 */
1380static MultiXactId
1382{
1383 dlist_iter iter;
1384
1385 debug_elog3(DEBUG2, "CacheGet: looking for %s",
1386 mxid_to_string(InvalidMultiXactId, nmembers, members));
1387
1388 /* sort the array so comparison is easy */
1389 qsort(members, nmembers, sizeof(MultiXactMember), mxactMemberComparator);
1390
1392 {
1394 iter.cur);
1395
1396 if (entry->nmembers != nmembers)
1397 continue;
1398
1399 /*
1400 * We assume the cache entries are sorted, and that the unused bits in
1401 * "status" are zeroed.
1402 */
1403 if (memcmp(members, entry->members, nmembers * sizeof(MultiXactMember)) == 0)
1404 {
1405 debug_elog3(DEBUG2, "CacheGet: found %u", entry->multi);
1407 return entry->multi;
1408 }
1409 }
1410
1411 debug_elog2(DEBUG2, "CacheGet: not found :-(");
1412 return InvalidMultiXactId;
1413}
1414
1415/*
1416 * mXactCacheGetById
1417 * returns the composing MultiXactMember set from the cache for a
1418 * given MultiXactId, if present.
1419 *
1420 * If successful, *xids is set to the address of a palloc'd copy of the
1421 * MultiXactMember set. Return value is number of members, or -1 on failure.
1422 */
1423static int
1425{
1426 dlist_iter iter;
1427
1428 debug_elog3(DEBUG2, "CacheGet: looking for %u", multi);
1429
1431 {
1433 iter.cur);
1434
1435 if (entry->multi == multi)
1436 {
1437 MultiXactMember *ptr;
1438 Size size;
1439
1440 size = sizeof(MultiXactMember) * entry->nmembers;
1441 ptr = (MultiXactMember *) palloc(size);
1442
1443 memcpy(ptr, entry->members, size);
1444
1445 debug_elog3(DEBUG2, "CacheGet: found %s",
1446 mxid_to_string(multi,
1447 entry->nmembers,
1448 entry->members));
1449
1450 /*
1451 * Note we modify the list while not using a modifiable iterator.
1452 * This is acceptable only because we exit the iteration
1453 * immediately afterwards.
1454 */
1456
1457 *members = ptr;
1458 return entry->nmembers;
1459 }
1460 }
1461
1462 debug_elog2(DEBUG2, "CacheGet: not found");
1463 return -1;
1464}
1465
1466/*
1467 * mXactCachePut
1468 * Add a new MultiXactId and its composing set into the local cache.
1469 */
1470static void
1471mXactCachePut(MultiXactId multi, int nmembers, MultiXactMember *members)
1472{
1473 mXactCacheEnt *entry;
1474
1475 debug_elog3(DEBUG2, "CachePut: storing %s",
1476 mxid_to_string(multi, nmembers, members));
1477
1478 if (MXactContext == NULL)
1479 {
1480 /* The cache only lives as long as the current transaction */
1481 debug_elog2(DEBUG2, "CachePut: initializing memory context");
1483 "MultiXact cache context",
1485 }
1486
1487 entry = (mXactCacheEnt *)
1489 offsetof(mXactCacheEnt, members) +
1490 nmembers * sizeof(MultiXactMember));
1491
1492 entry->multi = multi;
1493 entry->nmembers = nmembers;
1494 memcpy(entry->members, members, nmembers * sizeof(MultiXactMember));
1495
1496 /* mXactCacheGetBySet assumes the entries are sorted, so sort them */
1497 qsort(entry->members, nmembers, sizeof(MultiXactMember), mxactMemberComparator);
1498
1499 dclist_push_head(&MXactCache, &entry->node);
1501 {
1502 dlist_node *node;
1503
1506
1507 entry = dclist_container(mXactCacheEnt, node, node);
1508 debug_elog3(DEBUG2, "CachePut: pruning cached multi %u",
1509 entry->multi);
1510
1511 pfree(entry);
1512 }
1513}
1514
1515char *
1517{
1518 switch (status)
1519 {
1521 return "keysh";
1523 return "sh";
1525 return "fornokeyupd";
1527 return "forupd";
1529 return "nokeyupd";
1531 return "upd";
1532 default:
1533 elog(ERROR, "unrecognized multixact status %d", status);
1534 return "";
1535 }
1536}
1537
1538char *
1539mxid_to_string(MultiXactId multi, int nmembers, MultiXactMember *members)
1540{
1541 static char *str = NULL;
1543 int i;
1544
1545 if (str != NULL)
1546 pfree(str);
1547
1549
1550 appendStringInfo(&buf, "%u %d[%u (%s)", multi, nmembers, members[0].xid,
1551 mxstatus_to_string(members[0].status));
1552
1553 for (i = 1; i < nmembers; i++)
1554 appendStringInfo(&buf, ", %u (%s)", members[i].xid,
1555 mxstatus_to_string(members[i].status));
1556
1559 pfree(buf.data);
1560 return str;
1561}
1562
1563/*
1564 * AtEOXact_MultiXact
1565 * Handle transaction end for MultiXact
1566 *
1567 * This is called at top transaction commit or abort (we don't care which).
1568 */
1569void
1571{
1572 /*
1573 * Reset our OldestMemberMXactId and OldestVisibleMXactId values, both of
1574 * which should only be valid while within a transaction.
1575 *
1576 * We assume that storing a MultiXactId is atomic and so we need not take
1577 * MultiXactGenLock to do this.
1578 */
1581
1582 /*
1583 * Discard the local MultiXactId cache. Since MXactContext was created as
1584 * a child of TopTransactionContext, we needn't delete it explicitly.
1585 */
1586 MXactContext = NULL;
1588}
1589
1590/*
1591 * AtPrepare_MultiXact
1592 * Save multixact state at 2PC transaction prepare
1593 *
1594 * In this phase, we only store our OldestMemberMXactId value in the two-phase
1595 * state file.
1596 */
1597void
1599{
1601
1602 if (MultiXactIdIsValid(myOldestMember))
1604 &myOldestMember, sizeof(MultiXactId));
1605}
1606
1607/*
1608 * PostPrepare_MultiXact
1609 * Clean up after successful PREPARE TRANSACTION
1610 */
1611void
1613{
1614 MultiXactId myOldestMember;
1615
1616 /*
1617 * Transfer our OldestMemberMXactId value to the slot reserved for the
1618 * prepared transaction.
1619 */
1620 myOldestMember = OldestMemberMXactId[MyProcNumber];
1621 if (MultiXactIdIsValid(myOldestMember))
1622 {
1623 ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(fxid, false);
1624
1625 /*
1626 * Even though storing MultiXactId is atomic, acquire lock to make
1627 * sure others see both changes, not just the reset of the slot of the
1628 * current backend. Using a volatile pointer might suffice, but this
1629 * isn't a hot spot.
1630 */
1631 LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
1632
1633 OldestMemberMXactId[dummyProcNumber] = myOldestMember;
1635
1636 LWLockRelease(MultiXactGenLock);
1637 }
1638
1639 /*
1640 * We don't need to transfer OldestVisibleMXactId value, because the
1641 * transaction is not going to be looking at any more multixacts once it's
1642 * prepared.
1643 *
1644 * We assume that storing a MultiXactId is atomic and so we need not take
1645 * MultiXactGenLock to do this.
1646 */
1648
1649 /*
1650 * Discard the local MultiXactId cache like in AtEOXact_MultiXact.
1651 */
1652 MXactContext = NULL;
1654}
1655
1656/*
1657 * multixact_twophase_recover
1658 * Recover the state of a prepared transaction at startup
1659 */
1660void
1662 void *recdata, uint32 len)
1663{
1664 ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(fxid, false);
1665 MultiXactId oldestMember;
1666
1667 /*
1668 * Get the oldest member XID from the state file record, and set it in the
1669 * OldestMemberMXactId slot reserved for this prepared transaction.
1670 */
1671 Assert(len == sizeof(MultiXactId));
1672 oldestMember = *((MultiXactId *) recdata);
1673
1674 OldestMemberMXactId[dummyProcNumber] = oldestMember;
1675}
1676
1677/*
1678 * multixact_twophase_postcommit
1679 * Similar to AtEOXact_MultiXact but for COMMIT PREPARED
1680 */
1681void
1683 void *recdata, uint32 len)
1684{
1685 ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(fxid, true);
1686
1687 Assert(len == sizeof(MultiXactId));
1688
1689 OldestMemberMXactId[dummyProcNumber] = InvalidMultiXactId;
1690}
1691
1692/*
1693 * multixact_twophase_postabort
1694 * This is actually just the same as the COMMIT case.
1695 */
1696void
1698 void *recdata, uint32 len)
1699{
1700 multixact_twophase_postcommit(fxid, info, recdata, len);
1701}
1702
1703/*
1704 * Initialization of shared memory for MultiXact. We use two SLRU areas,
1705 * thus double memory. Also, reserve space for the shared MultiXactState
1706 * struct and the per-backend MultiXactId arrays (two of those, too).
1707 */
1708Size
1710{
1711 Size size;
1712
1713 /* We need 2*MaxOldestSlot perBackendXactIds[] entries */
1714#define SHARED_MULTIXACT_STATE_SIZE \
1715 add_size(offsetof(MultiXactStateData, perBackendXactIds), \
1716 mul_size(sizeof(MultiXactId) * 2, MaxOldestSlot))
1717
1721
1722 return size;
1723}
1724
1725void
1727{
1728 bool found;
1729
1730 debug_elog2(DEBUG2, "Shared Memory Init for MultiXact");
1731
1734
1736 "multixact_offset", multixact_offset_buffers, 0,
1737 "pg_multixact/offsets", LWTRANCHE_MULTIXACTOFFSET_BUFFER,
1738 LWTRANCHE_MULTIXACTOFFSET_SLRU,
1740 false);
1743 "multixact_member", multixact_member_buffers, 0,
1744 "pg_multixact/members", LWTRANCHE_MULTIXACTMEMBER_BUFFER,
1745 LWTRANCHE_MULTIXACTMEMBER_SLRU,
1747 true);
1748 /* doesn't call SimpleLruTruncate() or meet criteria for unit tests */
1749
1750 /* Initialize our shared state struct */
1751 MultiXactState = ShmemInitStruct("Shared MultiXact State",
1753 &found);
1754 if (!IsUnderPostmaster)
1755 {
1756 Assert(!found);
1757
1758 /* Make sure we zero out the per-backend state */
1760 }
1761 else
1762 Assert(found);
1763
1764 /*
1765 * Set up array pointers.
1766 */
1769}
1770
1771/*
1772 * GUC check_hook for multixact_offset_buffers
1773 */
1774bool
1776{
1777 return check_slru_buffers("multixact_offset_buffers", newval);
1778}
1779
1780/*
1781 * GUC check_hook for multixact_member_buffers
1782 */
1783bool
1785{
1786 return check_slru_buffers("multixact_member_buffers", newval);
1787}
1788
1789/*
1790 * This func must be called ONCE on system install. It creates the initial
1791 * MultiXact segments. (The MultiXacts directories are assumed to have been
1792 * created by initdb, and MultiXactShmemInit must have been called already.)
1793 */
1794void
1796{
1797 /* Zero the initial pages and flush them to disk */
1800}
1801
1802/*
1803 * This must be called ONCE during postmaster or standalone-backend startup.
1804 *
1805 * StartupXLOG has already established nextMXact/nextOffset by calling
1806 * MultiXactSetNextMXact and/or MultiXactAdvanceNextMXact, and the oldestMulti
1807 * info from pg_control and/or MultiXactAdvanceOldest, but we haven't yet
1808 * replayed WAL.
1809 */
1810void
1812{
1815 int64 pageno;
1816
1817 /*
1818 * Initialize offset's idea of the latest page number.
1819 */
1820 pageno = MultiXactIdToOffsetPage(multi);
1821 pg_atomic_write_u64(&MultiXactOffsetCtl->shared->latest_page_number,
1822 pageno);
1823
1824 /*
1825 * Initialize member's idea of the latest page number.
1826 */
1827 pageno = MXOffsetToMemberPage(offset);
1828 pg_atomic_write_u64(&MultiXactMemberCtl->shared->latest_page_number,
1829 pageno);
1830}
1831
1832/*
1833 * This must be called ONCE at the end of startup/recovery.
1834 */
1835void
1837{
1838 MultiXactId nextMXact;
1839 MultiXactOffset offset;
1840 MultiXactId oldestMXact;
1841 Oid oldestMXactDB;
1842 int64 pageno;
1843 int entryno;
1844 int flagsoff;
1845
1846 LWLockAcquire(MultiXactGenLock, LW_SHARED);
1847 nextMXact = MultiXactState->nextMXact;
1848 offset = MultiXactState->nextOffset;
1849 oldestMXact = MultiXactState->oldestMultiXactId;
1850 oldestMXactDB = MultiXactState->oldestMultiXactDB;
1851 LWLockRelease(MultiXactGenLock);
1852
1853 /* Clean up offsets state */
1854
1855 /*
1856 * (Re-)Initialize our idea of the latest page number for offsets.
1857 */
1858 pageno = MultiXactIdToOffsetPage(nextMXact);
1859 pg_atomic_write_u64(&MultiXactOffsetCtl->shared->latest_page_number,
1860 pageno);
1861
1862 /*
1863 * Set the offset of nextMXact on the offsets page. This is normally done
1864 * in RecordNewMultiXact() of the previous multixact, but let's be sure
1865 * the next page exists, if the nextMXact was reset with pg_resetwal for
1866 * example.
1867 *
1868 * Zero out the remainder of the page. See notes in TrimCLOG() for
1869 * background. Unlike CLOG, some WAL record covers every pg_multixact
1870 * SLRU mutation. Since, also unlike CLOG, we ignore the WAL rule "write
1871 * xlog before data," nextMXact successors may carry obsolete, nonzero
1872 * offset values.
1873 */
1874 entryno = MultiXactIdToOffsetEntry(nextMXact);
1875 {
1876 int slotno;
1877 MultiXactOffset *offptr;
1879
1881 if (entryno == 0 || nextMXact == FirstMultiXactId)
1882 slotno = SimpleLruZeroPage(MultiXactOffsetCtl, pageno);
1883 else
1884 slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, nextMXact);
1885 offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
1886 offptr += entryno;
1887
1888 *offptr = offset;
1889 if (entryno != 0 && (entryno + 1) * sizeof(MultiXactOffset) != BLCKSZ)
1890 MemSet(offptr + 1, 0, BLCKSZ - (entryno + 1) * sizeof(MultiXactOffset));
1891
1892 MultiXactOffsetCtl->shared->page_dirty[slotno] = true;
1893 LWLockRelease(lock);
1894 }
1895
1896 /*
1897 * And the same for members.
1898 *
1899 * (Re-)Initialize our idea of the latest page number for members.
1900 */
1901 pageno = MXOffsetToMemberPage(offset);
1902 pg_atomic_write_u64(&MultiXactMemberCtl->shared->latest_page_number,
1903 pageno);
1904
1905 /*
1906 * Zero out the remainder of the current members page. See notes in
1907 * TrimCLOG() for motivation.
1908 */
1909 flagsoff = MXOffsetToFlagsOffset(offset);
1910 if (flagsoff != 0)
1911 {
1912 int slotno;
1913 TransactionId *xidptr;
1914 int memberoff;
1916
1918 memberoff = MXOffsetToMemberOffset(offset);
1919 slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, true, offset);
1920 xidptr = (TransactionId *)
1921 (MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
1922
1923 MemSet(xidptr, 0, BLCKSZ - memberoff);
1924
1925 /*
1926 * Note: we don't need to zero out the flag bits in the remaining
1927 * members of the current group, because they are always reset before
1928 * writing.
1929 */
1930
1931 MultiXactMemberCtl->shared->page_dirty[slotno] = true;
1932 LWLockRelease(lock);
1933 }
1934
1935 /* signal that we're officially up */
1936 LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
1938 LWLockRelease(MultiXactGenLock);
1939
1940 /* Now compute how far away the next multixid wraparound is. */
1941 SetMultiXactIdLimit(oldestMXact, oldestMXactDB);
1942}
1943
1944/*
1945 * Get the MultiXact data to save in a checkpoint record
1946 */
1947void
1949 MultiXactId *nextMulti,
1950 MultiXactOffset *nextMultiOffset,
1951 MultiXactId *oldestMulti,
1952 Oid *oldestMultiDB)
1953{
1954 LWLockAcquire(MultiXactGenLock, LW_SHARED);
1955 *nextMulti = MultiXactState->nextMXact;
1956 *nextMultiOffset = MultiXactState->nextOffset;
1957 *oldestMulti = MultiXactState->oldestMultiXactId;
1958 *oldestMultiDB = MultiXactState->oldestMultiXactDB;
1959 LWLockRelease(MultiXactGenLock);
1960
1962 "MultiXact: checkpoint is nextMulti %u, nextOffset %" PRIu64 ", oldestMulti %u in DB %u",
1963 *nextMulti, *nextMultiOffset, *oldestMulti, *oldestMultiDB);
1964}
1965
1966/*
1967 * Perform a checkpoint --- either during shutdown, or on-the-fly
1968 */
1969void
1971{
1972 TRACE_POSTGRESQL_MULTIXACT_CHECKPOINT_START(true);
1973
1974 /*
1975 * Write dirty MultiXact pages to disk. This may result in sync requests
1976 * queued for later handling by ProcessSyncRequests(), as part of the
1977 * checkpoint.
1978 */
1981
1982 TRACE_POSTGRESQL_MULTIXACT_CHECKPOINT_DONE(true);
1983}
1984
1985/*
1986 * Set the next-to-be-assigned MultiXactId and offset
1987 *
1988 * This is used when we can determine the correct next ID/offset exactly
1989 * from a checkpoint record. Although this is only called during bootstrap
1990 * and XLog replay, we take the lock in case any hot-standby backends are
1991 * examining the values.
1992 */
1993void
1995 MultiXactOffset nextMultiOffset)
1996{
1997 Assert(MultiXactIdIsValid(nextMulti));
1998 debug_elog4(DEBUG2, "MultiXact: setting next multi to %u offset %" PRIu64,
1999 nextMulti, nextMultiOffset);
2000
2001 LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
2002 MultiXactState->nextMXact = nextMulti;
2003 MultiXactState->nextOffset = nextMultiOffset;
2004 LWLockRelease(MultiXactGenLock);
2005}
2006
2007/*
2008 * Determine the last safe MultiXactId to allocate given the currently oldest
2009 * datminmxid (ie, the oldest MultiXactId that might exist in any database
2010 * of our cluster), and the OID of the (or a) database with that value.
2011 *
2012 * This also updates MultiXactState->oldestOffset, by looking up the offset of
2013 * MultiXactState->oldestMultiXactId.
2014 */
2015void
2016SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid)
2017{
2018 MultiXactId multiVacLimit;
2019 MultiXactId multiWarnLimit;
2020 MultiXactId multiStopLimit;
2021 MultiXactId multiWrapLimit;
2022 MultiXactId curMulti;
2023
2024 Assert(MultiXactIdIsValid(oldest_datminmxid));
2025
2026 /*
2027 * We pretend that a wrap will happen halfway through the multixact ID
2028 * space, but that's not really true, because multixacts wrap differently
2029 * from transaction IDs.
2030 */
2031 multiWrapLimit = oldest_datminmxid + (MaxMultiXactId >> 1);
2032 if (multiWrapLimit < FirstMultiXactId)
2033 multiWrapLimit += FirstMultiXactId;
2034
2035 /*
2036 * We'll refuse to continue assigning MultiXactIds once we get within 3M
2037 * multi of data loss. See SetTransactionIdLimit.
2038 */
2039 multiStopLimit = multiWrapLimit - 3000000;
2040 if (multiStopLimit < FirstMultiXactId)
2041 multiStopLimit -= FirstMultiXactId;
2042
2043 /*
2044 * We'll start complaining loudly when we get within 40M multis of data
2045 * loss. This is kind of arbitrary, but if you let your gas gauge get
2046 * down to 2% of full, would you be looking for the next gas station? We
2047 * need to be fairly liberal about this number because there are lots of
2048 * scenarios where most transactions are done by automatic clients that
2049 * won't pay attention to warnings. (No, we're not gonna make this
2050 * configurable. If you know enough to configure it, you know enough to
2051 * not get in this kind of trouble in the first place.)
2052 */
2053 multiWarnLimit = multiWrapLimit - 40000000;
2054 if (multiWarnLimit < FirstMultiXactId)
2055 multiWarnLimit -= FirstMultiXactId;
2056
2057 /*
2058 * We'll start trying to force autovacuums when oldest_datminmxid gets to
2059 * be more than autovacuum_multixact_freeze_max_age mxids old.
2060 *
2061 * Note: autovacuum_multixact_freeze_max_age is a PGC_POSTMASTER parameter
2062 * so that we don't have to worry about dealing with on-the-fly changes in
2063 * its value. See SetTransactionIdLimit.
2064 */
2065 multiVacLimit = oldest_datminmxid + autovacuum_multixact_freeze_max_age;
2066 if (multiVacLimit < FirstMultiXactId)
2067 multiVacLimit += FirstMultiXactId;
2068
2069 /* Grab lock for just long enough to set the new limit values */
2070 LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
2071 MultiXactState->oldestMultiXactId = oldest_datminmxid;
2072 MultiXactState->oldestMultiXactDB = oldest_datoid;
2073 MultiXactState->multiVacLimit = multiVacLimit;
2074 MultiXactState->multiWarnLimit = multiWarnLimit;
2075 MultiXactState->multiStopLimit = multiStopLimit;
2076 MultiXactState->multiWrapLimit = multiWrapLimit;
2077 curMulti = MultiXactState->nextMXact;
2078 LWLockRelease(MultiXactGenLock);
2079
2080 /* Log the info */
2082 (errmsg_internal("MultiXactId wrap limit is %u, limited by database with OID %u",
2083 multiWrapLimit, oldest_datoid)));
2084
2085 /*
2086 * Computing the actual limits is only possible once the data directory is
2087 * in a consistent state. There's no need to compute the limits while
2088 * still replaying WAL - no decisions about new multis are made even
2089 * though multixact creations might be replayed. So we'll only do further
2090 * checks after TrimMultiXact() has been called.
2091 */
2093 return;
2094
2096
2097 /*
2098 * Offsets are 64-bits wide and never wrap around, so we don't need to
2099 * consider them for emergency autovacuum purposes. But now that we're in
2100 * a consistent state, determine MultiXactState->oldestOffset. It will be
2101 * used to adjust the freezing cutoff, to keep the offsets disk usage in
2102 * check.
2103 */
2105
2106 /*
2107 * If past the autovacuum force point, immediately signal an autovac
2108 * request. The reason for this is that autovac only processes one
2109 * database per invocation. Once it's finished cleaning up the oldest
2110 * database, it'll call here, and we'll signal the postmaster to start
2111 * another iteration immediately if there are still any old databases.
2112 */
2113 if (MultiXactIdPrecedes(multiVacLimit, curMulti) && IsUnderPostmaster)
2115
2116 /* Give an immediate warning if past the wrap warn point */
2117 if (MultiXactIdPrecedes(multiWarnLimit, curMulti))
2118 {
2119 char *oldest_datname;
2120
2121 /*
2122 * We can be called when not inside a transaction, for example during
2123 * StartupXLOG(). In such a case we cannot do database access, so we
2124 * must just report the oldest DB's OID.
2125 *
2126 * Note: it's also possible that get_database_name fails and returns
2127 * NULL, for example because the database just got dropped. We'll
2128 * still warn, even though the warning might now be unnecessary.
2129 */
2130 if (IsTransactionState())
2131 oldest_datname = get_database_name(oldest_datoid);
2132 else
2133 oldest_datname = NULL;
2134
2135 if (oldest_datname)
2137 (errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used",
2138 "database \"%s\" must be vacuumed before %u more MultiXactIds are used",
2139 multiWrapLimit - curMulti,
2140 oldest_datname,
2141 multiWrapLimit - curMulti),
2142 errhint("To avoid MultiXactId assignment failures, execute a database-wide VACUUM in that database.\n"
2143 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
2144 else
2146 (errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used",
2147 "database with OID %u must be vacuumed before %u more MultiXactIds are used",
2148 multiWrapLimit - curMulti,
2149 oldest_datoid,
2150 multiWrapLimit - curMulti),
2151 errhint("To avoid MultiXactId assignment failures, execute a database-wide VACUUM in that database.\n"
2152 "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
2153 }
2154}
2155
2156/*
2157 * Ensure the next-to-be-assigned MultiXactId is at least minMulti,
2158 * and similarly nextOffset is at least minMultiOffset.
2159 *
2160 * This is used when we can determine minimum safe values from an XLog
2161 * record (either an on-line checkpoint or an mxact creation log entry).
2162 * Although this is only called during XLog replay, we take the lock in case
2163 * any hot-standby backends are examining the values.
2164 */
2165void
2167 MultiXactOffset minMultiOffset)
2168{
2169 Assert(MultiXactIdIsValid(minMulti));
2170
2171 LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
2173 {
2174 debug_elog3(DEBUG2, "MultiXact: setting next multi to %u", minMulti);
2175 MultiXactState->nextMXact = minMulti;
2176 }
2177 if (MultiXactState->nextOffset < minMultiOffset)
2178 {
2179 debug_elog3(DEBUG2, "MultiXact: setting next offset to %" PRIu64,
2180 minMultiOffset);
2181 MultiXactState->nextOffset = minMultiOffset;
2182 }
2183 LWLockRelease(MultiXactGenLock);
2184}
2185
2186/*
2187 * Update our oldestMultiXactId value, but only if it's more recent than what
2188 * we had.
2189 *
2190 * This may only be called during WAL replay.
2191 */
2192void
2193MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
2194{
2196
2198 SetMultiXactIdLimit(oldestMulti, oldestMultiDB);
2199}
2200
2201/*
2202 * Make sure that MultiXactOffset has room for a newly-allocated MultiXactId.
2203 *
2204 * NB: this is called while holding MultiXactGenLock. We want it to be very
2205 * fast most of the time; even when it's not so fast, no actual I/O need
2206 * happen unless we're forced to write out a dirty log or xlog page to make
2207 * room in shared memory.
2208 */
2209static void
2211{
2212 int64 pageno;
2213 LWLock *lock;
2214
2215 /*
2216 * No work except at first MultiXactId of a page. But beware: just after
2217 * wraparound, the first MultiXactId of page zero is FirstMultiXactId.
2218 */
2219 if (MultiXactIdToOffsetEntry(multi) != 0 &&
2220 multi != FirstMultiXactId)
2221 return;
2222
2223 pageno = MultiXactIdToOffsetPage(multi);
2225
2227
2228 /* Zero the page and make a WAL entry about it */
2231 pageno);
2232
2233 LWLockRelease(lock);
2234}
2235
2236/*
2237 * Make sure that MultiXactMember has room for the members of a newly-
2238 * allocated MultiXactId.
2239 *
2240 * Like the above routine, this is called while holding MultiXactGenLock;
2241 * same comments apply.
2242 */
2243static void
2245{
2246 /*
2247 * It's possible that the members span more than one page of the members
2248 * file, so we loop to ensure we consider each page. The coding is not
2249 * optimal if the members span several pages, but that seems unusual
2250 * enough to not worry much about.
2251 */
2252 while (nmembers > 0)
2253 {
2254 int flagsoff;
2255 int flagsbit;
2257
2258 /*
2259 * Only zero when at first entry of a page.
2260 */
2261 flagsoff = MXOffsetToFlagsOffset(offset);
2262 flagsbit = MXOffsetToFlagsBitShift(offset);
2263 if (flagsoff == 0 && flagsbit == 0)
2264 {
2265 int64 pageno;
2266 LWLock *lock;
2267
2268 pageno = MXOffsetToMemberPage(offset);
2270
2272
2273 /* Zero the page and make a WAL entry about it */
2275 XLogSimpleInsertInt64(RM_MULTIXACT_ID,
2277
2278 LWLockRelease(lock);
2279 }
2280
2281 /* Compute the number of items till end of current page. */
2283
2284 /*
2285 * Advance to next page. OK if nmembers goes negative.
2286 */
2287 nmembers -= difference;
2288 offset += difference;
2289 }
2290}
2291
2292/*
2293 * GetOldestMultiXactId
2294 *
2295 * Return the oldest MultiXactId that's still possibly still seen as live by
2296 * any running transaction. Older ones might still exist on disk, but they no
2297 * longer have any running member transaction.
2298 *
2299 * It's not safe to truncate MultiXact SLRU segments on the value returned by
2300 * this function; however, it can be set as the new relminmxid for any table
2301 * that VACUUM knows has no remaining MXIDs < the same value. It is only safe
2302 * to truncate SLRUs when no table can possibly still have a referencing MXID.
2303 */
2306{
2307 MultiXactId oldestMXact;
2308 int i;
2309
2310 /*
2311 * This is the oldest valid value among all the OldestMemberMXactId[] and
2312 * OldestVisibleMXactId[] entries, or nextMXact if none are valid.
2313 */
2314 LWLockAcquire(MultiXactGenLock, LW_SHARED);
2315 oldestMXact = MultiXactState->nextMXact;
2316 for (i = 0; i < MaxOldestSlot; i++)
2317 {
2318 MultiXactId thisoldest;
2319
2320 thisoldest = OldestMemberMXactId[i];
2321 if (MultiXactIdIsValid(thisoldest) &&
2322 MultiXactIdPrecedes(thisoldest, oldestMXact))
2323 oldestMXact = thisoldest;
2324 thisoldest = OldestVisibleMXactId[i];
2325 if (MultiXactIdIsValid(thisoldest) &&
2326 MultiXactIdPrecedes(thisoldest, oldestMXact))
2327 oldestMXact = thisoldest;
2328 }
2329
2330 LWLockRelease(MultiXactGenLock);
2331
2332 return oldestMXact;
2333}
2334
2335/*
2336 * Calculate the oldest member offset and install it in MultiXactState, where
2337 * it can be used to adjust multixid freezing cutoffs.
2338 */
2339static void
2341{
2342 MultiXactId oldestMultiXactId;
2343 MultiXactId nextMXact;
2344 MultiXactOffset oldestOffset = 0; /* placate compiler */
2345 MultiXactOffset nextOffset;
2346 bool oldestOffsetKnown = false;
2347
2348 /*
2349 * NB: Have to prevent concurrent truncation, we might otherwise try to
2350 * lookup an oldestMulti that's concurrently getting truncated away.
2351 */
2352 LWLockAcquire(MultiXactTruncationLock, LW_SHARED);
2353
2354 /* Read relevant fields from shared memory. */
2355 LWLockAcquire(MultiXactGenLock, LW_SHARED);
2356 oldestMultiXactId = MultiXactState->oldestMultiXactId;
2357 nextMXact = MultiXactState->nextMXact;
2358 nextOffset = MultiXactState->nextOffset;
2360 LWLockRelease(MultiXactGenLock);
2361
2362 /*
2363 * Determine the offset of the oldest multixact. Normally, we can read
2364 * the offset from the multixact itself, but there's an important special
2365 * case: if there are no multixacts in existence at all, oldestMXact
2366 * obviously can't point to one. It will instead point to the multixact
2367 * ID that will be assigned the next time one is needed.
2368 */
2369 if (oldestMultiXactId == nextMXact)
2370 {
2371 /*
2372 * When the next multixact gets created, it will be stored at the next
2373 * offset.
2374 */
2375 oldestOffset = nextOffset;
2376 oldestOffsetKnown = true;
2377 }
2378 else
2379 {
2380 /*
2381 * Look up the offset at which the oldest existing multixact's members
2382 * are stored. If we cannot find it, be careful not to fail, and
2383 * leave oldestOffset unchanged. oldestOffset is initialized to zero
2384 * at system startup, which prevents truncating members until a proper
2385 * value is calculated.
2386 *
2387 * (We had bugs in early releases of PostgreSQL 9.3.X and 9.4.X where
2388 * the supposedly-earliest multixact might not really exist. Those
2389 * should be long gone by now, so this should not fail, but let's
2390 * still be defensive.)
2391 */
2392 oldestOffsetKnown =
2393 find_multixact_start(oldestMultiXactId, &oldestOffset);
2394
2395 if (oldestOffsetKnown)
2397 (errmsg_internal("oldest MultiXactId member is at offset %" PRIu64,
2398 oldestOffset)));
2399 else
2400 ereport(LOG,
2401 (errmsg("MultiXact member truncation is disabled because oldest checkpointed MultiXact %u does not exist on disk",
2402 oldestMultiXactId)));
2403 }
2404
2405 LWLockRelease(MultiXactTruncationLock);
2406
2407 /* Install the computed value */
2408 if (oldestOffsetKnown)
2409 {
2410 LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
2411 MultiXactState->oldestOffset = oldestOffset;
2412 LWLockRelease(MultiXactGenLock);
2413 }
2414}
2415
2416/*
2417 * Find the starting offset of the given MultiXactId.
2418 *
2419 * Returns false if the file containing the multi does not exist on disk.
2420 * Otherwise, returns true and sets *result to the starting member offset.
2421 *
2422 * This function does not prevent concurrent truncation, so if that's
2423 * required, the caller has to protect against that.
2424 */
2425static bool
2427{
2428 MultiXactOffset offset;
2429 int64 pageno;
2430 int entryno;
2431 int slotno;
2432 MultiXactOffset *offptr;
2433
2435
2436 pageno = MultiXactIdToOffsetPage(multi);
2437 entryno = MultiXactIdToOffsetEntry(multi);
2438
2439 /*
2440 * Write out dirty data, so PhysicalPageExists can work correctly.
2441 */
2444
2446 return false;
2447
2448 /* lock is acquired by SimpleLruReadPage_ReadOnly */
2449 slotno = SimpleLruReadPage_ReadOnly(MultiXactOffsetCtl, pageno, multi);
2450 offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
2451 offptr += entryno;
2452 offset = *offptr;
2454
2455 *result = offset;
2456 return true;
2457}
2458
2459/*
2460 * GetMultiXactInfo
2461 *
2462 * Returns information about the current MultiXact state, as of:
2463 * multixacts: Number of MultiXacts (nextMultiXactId - oldestMultiXactId)
2464 * nextOffset: Next-to-be-assigned offset
2465 * oldestMultiXactId: Oldest MultiXact ID still in use
2466 * oldestOffset: Oldest offset still in use
2467 */
2468void
2469GetMultiXactInfo(uint32 *multixacts, MultiXactOffset *nextOffset,
2470 MultiXactId *oldestMultiXactId, MultiXactOffset *oldestOffset)
2471{
2472 MultiXactId nextMultiXactId;
2473
2474 LWLockAcquire(MultiXactGenLock, LW_SHARED);
2475 *nextOffset = MultiXactState->nextOffset;
2476 *oldestMultiXactId = MultiXactState->oldestMultiXactId;
2477 nextMultiXactId = MultiXactState->nextMXact;
2478 *oldestOffset = MultiXactState->oldestOffset;
2479 LWLockRelease(MultiXactGenLock);
2480
2481 *multixacts = nextMultiXactId - *oldestMultiXactId;
2482}
2483
2484/*
2485 * Multixact members can be removed once the multixacts that refer to them
2486 * are older than every datminmxid. autovacuum_multixact_freeze_max_age and
2487 * vacuum_multixact_freeze_table_age work together to make sure we never have
2488 * too many multixacts; we hope that, at least under normal circumstances,
2489 * this will also be sufficient to keep us from using too many offsets.
2490 * However, if the average multixact has many members, we might accumulate a
2491 * large amount of members, consuming disk space, while still using few enough
2492 * multixids that the multixid limits fail to trigger relminmxid advancement
2493 * by VACUUM.
2494 *
2495 * To prevent that, if the members space usage exceeds a threshold
2496 * (MULTIXACT_MEMBER_LOW_THRESHOLD), we effectively reduce
2497 * autovacuum_multixact_freeze_max_age to a value just less than the number of
2498 * multixacts in use. We hope that this will quickly trigger autovacuuming on
2499 * the table or tables with the oldest relminmxid, thus allowing datminmxid
2500 * values to advance and removing some members.
2501 *
2502 * As the amount of the member space in use grows, we become more aggressive
2503 * in clamping this value. That not only causes autovacuum to ramp up, but
2504 * also makes any manual vacuums the user issues more aggressive. This
2505 * happens because vacuum_get_cutoffs() will clamp the freeze table and the
2506 * minimum freeze age cutoffs based on the effective
2507 * autovacuum_multixact_freeze_max_age this function returns. At the extreme,
2508 * when the members usage reaches MULTIXACT_MEMBER_HIGH_THRESHOLD, we clamp
2509 * freeze_max_age to zero, and every vacuum of any table will freeze every
2510 * multixact.
2511 */
2512int
2514{
2515 uint32 multixacts;
2516 uint32 victim_multixacts;
2517 double fraction;
2518 int result;
2519 MultiXactId oldestMultiXactId;
2520 MultiXactOffset oldestOffset;
2521 MultiXactOffset nextOffset;
2522 uint64 members;
2523
2524 /* Read the current offsets and multixact usage. */
2525 GetMultiXactInfo(&multixacts, &nextOffset, &oldestMultiXactId, &oldestOffset);
2526 members = nextOffset - oldestOffset;
2527
2528 /* If member space utilization is low, no special action is required. */
2529 if (members <= MULTIXACT_MEMBER_LOW_THRESHOLD)
2531
2532 /*
2533 * Compute a target for relminmxid advancement. The number of multixacts
2534 * we try to eliminate from the system is based on how far we are past
2535 * MULTIXACT_MEMBER_LOW_THRESHOLD.
2536 *
2537 * The way this formula works is that when members is exactly at the low
2538 * threshold, fraction = 0.0, and we set freeze_max_age equal to
2539 * mxid_age(oldestMultiXactId). As members grows further, towards the
2540 * high threshold, fraction grows linearly from 0.0 to 1.0, and the result
2541 * shrinks from mxid_age(oldestMultiXactId) to 0. Beyond the high
2542 * threshold, fraction > 1.0 and the result is clamped to 0.
2543 */
2544 fraction = (double) (members - MULTIXACT_MEMBER_LOW_THRESHOLD) /
2546
2547 /* fraction could be > 1.0, but lowest possible freeze age is zero */
2548 if (fraction >= 1.0)
2549 return 0;
2550
2551 victim_multixacts = multixacts * fraction;
2552 result = multixacts - victim_multixacts;
2553
2554 /*
2555 * Clamp to autovacuum_multixact_freeze_max_age, so that we never make
2556 * autovacuum less aggressive than it would otherwise be.
2557 */
2559}
2560
2561typedef struct mxtruncinfo
2562{
2565
2566/*
2567 * SlruScanDirectory callback
2568 * This callback determines the earliest existing page number.
2569 */
2570static bool
2572{
2573 mxtruncinfo *trunc = (mxtruncinfo *) data;
2574
2575 if (trunc->earliestExistingPage == -1 ||
2576 ctl->PagePrecedes(segpage, trunc->earliestExistingPage))
2577 {
2578 trunc->earliestExistingPage = segpage;
2579 }
2580
2581 return false; /* keep going */
2582}
2583
2584
2585/*
2586 * Delete members segments [oldest, newOldest)
2587 */
2588static void
2590{
2592 MXOffsetToMemberPage(newOldestOffset));
2593}
2594
2595/*
2596 * Delete offsets segments [oldest, newOldest)
2597 */
2598static void
2600{
2601 /*
2602 * We step back one multixact to avoid passing a cutoff page that hasn't
2603 * been created yet in the rare case that oldestMulti would be the first
2604 * item on a page and oldestMulti == nextMulti. In that case, if we
2605 * didn't subtract one, we'd trigger SimpleLruTruncate's wraparound
2606 * detection.
2607 */
2610}
2611
2612/*
2613 * Remove all MultiXactOffset and MultiXactMember segments before the oldest
2614 * ones still of interest.
2615 *
2616 * This is only called on a primary as part of vacuum (via
2617 * vac_truncate_clog()). During recovery truncation is done by replaying
2618 * truncation WAL records logged here.
2619 *
2620 * newOldestMulti is the oldest currently required multixact, newOldestMultiDB
2621 * is one of the databases preventing newOldestMulti from increasing.
2622 */
2623void
2624TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
2625{
2626 MultiXactId oldestMulti;
2627 MultiXactId nextMulti;
2628 MultiXactOffset newOldestOffset;
2629 MultiXactOffset oldestOffset;
2630 MultiXactOffset nextOffset;
2631 mxtruncinfo trunc;
2632 MultiXactId earliest;
2633
2636 Assert(MultiXactIdIsValid(newOldestMulti));
2637
2638 /*
2639 * We can only allow one truncation to happen at once. Otherwise parts of
2640 * members might vanish while we're doing lookups or similar. There's no
2641 * need to have an interlock with creating new multis or such, since those
2642 * are constrained by the limits (which only grow, never shrink).
2643 */
2644 LWLockAcquire(MultiXactTruncationLock, LW_EXCLUSIVE);
2645
2646 LWLockAcquire(MultiXactGenLock, LW_SHARED);
2647 nextMulti = MultiXactState->nextMXact;
2648 nextOffset = MultiXactState->nextOffset;
2649 oldestMulti = MultiXactState->oldestMultiXactId;
2650 LWLockRelease(MultiXactGenLock);
2651
2652 /*
2653 * Make sure to only attempt truncation if there's values to truncate
2654 * away. In normal processing values shouldn't go backwards, but there's
2655 * some corner cases (due to bugs) where that's possible.
2656 */
2657 if (MultiXactIdPrecedesOrEquals(newOldestMulti, oldestMulti))
2658 {
2659 LWLockRelease(MultiXactTruncationLock);
2660 return;
2661 }
2662
2663 /*
2664 * Note we can't just plow ahead with the truncation; it's possible that
2665 * there are no segments to truncate, which is a problem because we are
2666 * going to attempt to read the offsets page to determine where to
2667 * truncate the members SLRU. So we first scan the directory to determine
2668 * the earliest offsets page number that we can read without error.
2669 *
2670 * When nextMXact is less than one segment away from multiWrapLimit,
2671 * SlruScanDirCbFindEarliest can find some early segment other than the
2672 * actual earliest. (MultiXactOffsetPagePrecedes(EARLIEST, LATEST)
2673 * returns false, because not all pairs of entries have the same answer.)
2674 * That can also arise when an earlier truncation attempt failed unlink()
2675 * or returned early from this function. The only consequence is
2676 * returning early, which wastes space that we could have liberated.
2677 *
2678 * NB: It's also possible that the page that oldestMulti is on has already
2679 * been truncated away, and we crashed before updating oldestMulti.
2680 */
2681 trunc.earliestExistingPage = -1;
2684 if (earliest < FirstMultiXactId)
2685 earliest = FirstMultiXactId;
2686
2687 /* If there's nothing to remove, we can bail out early. */
2688 if (MultiXactIdPrecedes(oldestMulti, earliest))
2689 {
2690 LWLockRelease(MultiXactTruncationLock);
2691 return;
2692 }
2693
2694 /*
2695 * First, compute the safe truncation point for MultiXactMember. This is
2696 * the starting offset of the oldest multixact.
2697 *
2698 * Hopefully, find_multixact_start will always work here, because we've
2699 * already checked that it doesn't precede the earliest MultiXact on disk.
2700 * But if it fails, don't truncate anything, and log a message.
2701 */
2702 if (oldestMulti == nextMulti)
2703 {
2704 /* there are NO MultiXacts */
2705 oldestOffset = nextOffset;
2706 }
2707 else if (!find_multixact_start(oldestMulti, &oldestOffset))
2708 {
2709 ereport(LOG,
2710 (errmsg("oldest MultiXact %u not found, earliest MultiXact %u, skipping truncation",
2711 oldestMulti, earliest)));
2712 LWLockRelease(MultiXactTruncationLock);
2713 return;
2714 }
2715
2716 /*
2717 * Secondly compute up to where to truncate. Lookup the corresponding
2718 * member offset for newOldestMulti for that.
2719 */
2720 if (newOldestMulti == nextMulti)
2721 {
2722 /* there are NO MultiXacts */
2723 newOldestOffset = nextOffset;
2724 }
2725 else if (!find_multixact_start(newOldestMulti, &newOldestOffset))
2726 {
2727 ereport(LOG,
2728 (errmsg("cannot truncate up to MultiXact %u because it does not exist on disk, skipping truncation",
2729 newOldestMulti)));
2730 LWLockRelease(MultiXactTruncationLock);
2731 return;
2732 }
2733
2734 elog(DEBUG1, "performing multixact truncation: "
2735 "offsets [%u, %u), offsets segments [%" PRIx64 ", %" PRIx64 "), "
2736 "members [%" PRIu64 ", %" PRIu64 "), members segments [%" PRIx64 ", %" PRIx64 ")",
2737 oldestMulti, newOldestMulti,
2738 MultiXactIdToOffsetSegment(oldestMulti),
2739 MultiXactIdToOffsetSegment(newOldestMulti),
2740 oldestOffset, newOldestOffset,
2741 MXOffsetToMemberSegment(oldestOffset),
2742 MXOffsetToMemberSegment(newOldestOffset));
2743
2744 /*
2745 * Do truncation, and the WAL logging of the truncation, in a critical
2746 * section. That way offsets/members cannot get out of sync anymore, i.e.
2747 * once consistent the newOldestMulti will always exist in members, even
2748 * if we crashed in the wrong moment.
2749 */
2751
2752 /*
2753 * Prevent checkpoints from being scheduled concurrently. This is critical
2754 * because otherwise a truncation record might not be replayed after a
2755 * crash/basebackup, even though the state of the data directory would
2756 * require it.
2757 */
2760
2761 /* WAL log truncation */
2762 WriteMTruncateXlogRec(newOldestMultiDB,
2763 oldestMulti, newOldestMulti,
2764 oldestOffset, newOldestOffset);
2765
2766 /*
2767 * Update in-memory limits before performing the truncation, while inside
2768 * the critical section: Have to do it before truncation, to prevent
2769 * concurrent lookups of those values. Has to be inside the critical
2770 * section as otherwise a future call to this function would error out,
2771 * while looking up the oldest member in offsets, if our caller crashes
2772 * before updating the limits.
2773 */
2774 LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
2775 MultiXactState->oldestMultiXactId = newOldestMulti;
2776 MultiXactState->oldestMultiXactDB = newOldestMultiDB;
2777 MultiXactState->oldestOffset = newOldestOffset;
2778 LWLockRelease(MultiXactGenLock);
2779
2780 /* First truncate members */
2781 PerformMembersTruncation(oldestOffset, newOldestOffset);
2782
2783 /* Then offsets */
2784 PerformOffsetsTruncation(oldestMulti, newOldestMulti);
2785
2786 MyProc->delayChkptFlags &= ~DELAY_CHKPT_START;
2787
2789 LWLockRelease(MultiXactTruncationLock);
2790}
2791
2792/*
2793 * Decide whether a MultiXactOffset page number is "older" for truncation
2794 * purposes. Analogous to CLOGPagePrecedes().
2795 *
2796 * Offsetting the values is optional, because MultiXactIdPrecedes() has
2797 * translational symmetry.
2798 */
2799static bool
2801{
2802 MultiXactId multi1;
2803 MultiXactId multi2;
2804
2805 multi1 = ((MultiXactId) page1) * MULTIXACT_OFFSETS_PER_PAGE;
2806 multi1 += FirstMultiXactId + 1;
2807 multi2 = ((MultiXactId) page2) * MULTIXACT_OFFSETS_PER_PAGE;
2808 multi2 += FirstMultiXactId + 1;
2809
2810 return (MultiXactIdPrecedes(multi1, multi2) &&
2811 MultiXactIdPrecedes(multi1,
2812 multi2 + MULTIXACT_OFFSETS_PER_PAGE - 1));
2813}
2814
2815/*
2816 * Decide whether a MultiXactMember page number is "older" for truncation
2817 * purposes. There is no "invalid offset number" and members never wrap
2818 * around, so use the numbers verbatim.
2819 */
2820static bool
2822{
2823 return page1 < page2;
2824}
2825
2826/*
2827 * Decide which of two MultiXactIds is earlier.
2828 *
2829 * XXX do we need to do something special for InvalidMultiXactId?
2830 * (Doesn't look like it.)
2831 */
2832bool
2834{
2835 int32 diff = (int32) (multi1 - multi2);
2836
2837 return (diff < 0);
2838}
2839
2840/*
2841 * MultiXactIdPrecedesOrEquals -- is multi1 logically <= multi2?
2842 *
2843 * XXX do we need to do something special for InvalidMultiXactId?
2844 * (Doesn't look like it.)
2845 */
2846bool
2848{
2849 int32 diff = (int32) (multi1 - multi2);
2850
2851 return (diff <= 0);
2852}
2853
2854
2855/*
2856 * Write a TRUNCATE xlog record
2857 *
2858 * We must flush the xlog record to disk before returning --- see notes in
2859 * TruncateCLOG().
2860 */
2861static void
2863 MultiXactId startTruncOff, MultiXactId endTruncOff,
2864 MultiXactOffset startTruncMemb, MultiXactOffset endTruncMemb)
2865{
2866 XLogRecPtr recptr;
2868
2869 xlrec.oldestMultiDB = oldestMultiDB;
2870
2871 xlrec.startTruncOff = startTruncOff;
2872 xlrec.endTruncOff = endTruncOff;
2873
2874 xlrec.startTruncMemb = startTruncMemb;
2875 xlrec.endTruncMemb = endTruncMemb;
2876
2879 recptr = XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_TRUNCATE_ID);
2880 XLogFlush(recptr);
2881}
2882
2883/*
2884 * MULTIXACT resource manager's routines
2885 */
2886void
2888{
2889 uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
2890
2891 /* Backup blocks are not used in multixact records */
2893
2894 if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE)
2895 {
2896 int64 pageno;
2897
2898 memcpy(&pageno, XLogRecGetData(record), sizeof(pageno));
2900 }
2901 else if (info == XLOG_MULTIXACT_ZERO_MEM_PAGE)
2902 {
2903 int64 pageno;
2904
2905 memcpy(&pageno, XLogRecGetData(record), sizeof(pageno));
2907 }
2908 else if (info == XLOG_MULTIXACT_CREATE_ID)
2909 {
2910 xl_multixact_create *xlrec =
2912 TransactionId max_xid;
2913 int i;
2914
2915 /* Store the data back into the SLRU files */
2916 RecordNewMultiXact(xlrec->mid, xlrec->moff, xlrec->nmembers,
2917 xlrec->members);
2918
2919 /* Make sure nextMXact/nextOffset are beyond what this record has */
2921 xlrec->moff + xlrec->nmembers);
2922
2923 /*
2924 * Make sure nextXid is beyond any XID mentioned in the record. This
2925 * should be unnecessary, since any XID found here ought to have other
2926 * evidence in the XLOG, but let's be safe.
2927 */
2928 max_xid = XLogRecGetXid(record);
2929 for (i = 0; i < xlrec->nmembers; i++)
2930 {
2931 if (TransactionIdPrecedes(max_xid, xlrec->members[i].xid))
2932 max_xid = xlrec->members[i].xid;
2933 }
2934
2936 }
2937 else if (info == XLOG_MULTIXACT_TRUNCATE_ID)
2938 {
2940 int64 pageno;
2941
2942 memcpy(&xlrec, XLogRecGetData(record),
2944
2945 elog(DEBUG1, "replaying multixact truncation: "
2946 "offsets [%u, %u), offsets segments [%" PRIx64 ", %" PRIx64 "), "
2947 "members [%" PRIu64 ", %" PRIu64 "), members segments [%" PRIx64 ", %" PRIx64 ")",
2948 xlrec.startTruncOff, xlrec.endTruncOff,
2951 xlrec.startTruncMemb, xlrec.endTruncMemb,
2954
2955 /* should not be required, but more than cheap enough */
2956 LWLockAcquire(MultiXactTruncationLock, LW_EXCLUSIVE);
2957
2958 /*
2959 * Advance the horizon values, so they're current at the end of
2960 * recovery.
2961 */
2963
2965
2966 /*
2967 * During XLOG replay, latest_page_number isn't necessarily set up
2968 * yet; insert a suitable value to bypass the sanity test in
2969 * SimpleLruTruncate.
2970 */
2971 pageno = MultiXactIdToOffsetPage(xlrec.endTruncOff);
2972 pg_atomic_write_u64(&MultiXactOffsetCtl->shared->latest_page_number,
2973 pageno);
2975
2976 LWLockRelease(MultiXactTruncationLock);
2977 }
2978 else
2979 elog(PANIC, "multixact_redo: unknown op code %u", info);
2980}
2981
2982/*
2983 * Entrypoint for sync.c to sync offsets files.
2984 */
2985int
2986multixactoffsetssyncfiletag(const FileTag *ftag, char *path)
2987{
2988 return SlruSyncFileTag(MultiXactOffsetCtl, ftag, path);
2989}
2990
2991/*
2992 * Entrypoint for sync.c to sync members files.
2993 */
2994int
2995multixactmemberssyncfiletag(const FileTag *ftag, char *path)
2996{
2997 return SlruSyncFileTag(MultiXactMemberCtl, ftag, path);
2998}
static void pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:485
int autovacuum_multixact_freeze_max_age
Definition: autovacuum.c:130
static int32 next
Definition: blutils.c:225
#define Min(x, y)
Definition: c.h:1003
uint8_t uint8
Definition: c.h:550
int64_t int64
Definition: c.h:549
TransactionId MultiXactId
Definition: c.h:682
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:486
uint64 MultiXactOffset
Definition: c.h:684
int32_t int32
Definition: c.h:548
uint64_t uint64
Definition: c.h:553
uint16_t uint16
Definition: c.h:551
uint32_t uint32
Definition: c.h:552
#define MemSet(start, val, len)
Definition: c.h:1019
uint32 TransactionId
Definition: c.h:672
size_t Size
Definition: c.h:625
int errmsg_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...)
Definition: elog.c:1193
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1170
int errhint(const char *fmt,...)
Definition: elog.c:1330
int errcode(int sqlerrcode)
Definition: elog.c:863
int errmsg(const char *fmt,...)
Definition: elog.c:1080
#define LOG
Definition: elog.h:31
#define WARNING
Definition: elog.h:36
#define DEBUG2
Definition: elog.h:29
#define PANIC
Definition: elog.h:42
#define DEBUG1
Definition: elog.h:30
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
#define ereport(elevel,...)
Definition: elog.h:150
#define palloc_array(type, count)
Definition: fe_memutils.h:76
Datum difference(PG_FUNCTION_ARGS)
int multixact_offset_buffers
Definition: globals.c:163
ProcNumber MyProcNumber
Definition: globals.c:90
bool IsUnderPostmaster
Definition: globals.c:120
int multixact_member_buffers
Definition: globals.c:162
#define newval
GucSource
Definition: guc.h:112
Assert(PointerIsAligned(start, uint64))
const char * str
#define dclist_container(type, membername, ptr)
Definition: ilist.h:947
static uint32 dclist_count(const dclist_head *head)
Definition: ilist.h:932
static void dclist_move_head(dclist_head *head, dlist_node *node)
Definition: ilist.h:808
static dlist_node * dclist_tail_node(dclist_head *head)
Definition: ilist.h:920
static void dclist_delete_from(dclist_head *head, dlist_node *node)
Definition: ilist.h:763
#define DCLIST_STATIC_INIT(name)
Definition: ilist.h:282
static void dclist_push_head(dclist_head *head, dlist_node *node)
Definition: ilist.h:693
static void dclist_init(dclist_head *head)
Definition: ilist.h:671
#define dclist_foreach(iter, lhead)
Definition: ilist.h:970
#define INJECTION_POINT_CACHED(name, arg)
#define INJECTION_POINT_LOAD(name)
int j
Definition: isn.c:78
int i
Definition: isn.c:77
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:81
char * get_database_name(Oid dbid)
Definition: lsyscache.c:1242
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1178
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1898
@ LW_SHARED
Definition: lwlock.h:113
@ LW_EXCLUSIVE
Definition: lwlock.h:112
char * MemoryContextStrdup(MemoryContext context, const char *string)
Definition: mcxt.c:1768
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:1232
MemoryContext TopTransactionContext
Definition: mcxt.c:171
void pfree(void *pointer)
Definition: mcxt.c:1616
MemoryContext TopMemoryContext
Definition: mcxt.c:166
void * palloc(Size size)
Definition: mcxt.c:1387
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_SMALL_SIZES
Definition: memutils.h:170
#define START_CRIT_SECTION()
Definition: miscadmin.h:150
#define END_CRIT_SECTION()
Definition: miscadmin.h:152
static void WriteMTruncateXlogRec(Oid oldestMultiDB, MultiXactId startTruncOff, MultiXactId endTruncOff, MultiXactOffset startTruncMemb, MultiXactOffset endTruncMemb)
Definition: multixact.c:2862
static MultiXactId PreviousMultiXactId(MultiXactId multi)
Definition: multixact.c:108
static SlruCtlData MultiXactOffsetCtlData
Definition: multixact.c:116
void MultiXactShmemInit(void)
Definition: multixact.c:1726
static bool MultiXactMemberPagePrecedes(int64 page1, int64 page2)
Definition: multixact.c:2821
static MultiXactId GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
Definition: multixact.c:926
static int mXactCacheGetById(MultiXactId multi, MultiXactMember **members)
Definition: multixact.c:1424
MultiXactId MultiXactIdExpand(MultiXactId multi, TransactionId xid, MultiXactStatus status)
Definition: multixact.c:354
static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers)
Definition: multixact.c:2244
void ReadMultiXactIdRange(MultiXactId *oldest, MultiXactId *next)
Definition: multixact.c:639
static void PerformOffsetsTruncation(MultiXactId oldestMulti, MultiXactId newOldestMulti)
Definition: multixact.c:2599
bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:2833
char * mxstatus_to_string(MultiXactStatus status)
Definition: multixact.c:1516
void multixact_redo(XLogReaderState *record)
Definition: multixact.c:2887
void multixact_twophase_postcommit(FullTransactionId fxid, uint16 info, void *recdata, uint32 len)
Definition: multixact.c:1682
#define debug_elog5(a, b, c, d, e)
Definition: multixact.c:260
static void MultiXactIdSetOldestVisible(void)
Definition: multixact.c:589
int multixactoffsetssyncfiletag(const FileTag *ftag, char *path)
Definition: multixact.c:2986
static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result)
Definition: multixact.c:2426
void PostPrepare_MultiXact(FullTransactionId fxid)
Definition: multixact.c:1612
void MultiXactSetNextMXact(MultiXactId nextMulti, MultiXactOffset nextMultiOffset)
Definition: multixact.c:1994
#define MultiXactMemberCtl
Definition: multixact.c:120
static bool SlruScanDirCbFindEarliest(SlruCtl ctl, char *filename, int64 segpage, void *data)
Definition: multixact.c:2571
void AtPrepare_MultiXact(void)
Definition: multixact.c:1598
bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:2847
void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
Definition: multixact.c:2193
static void mXactCachePut(MultiXactId multi, int nmembers, MultiXactMember *members)
Definition: multixact.c:1471
void GetMultiXactInfo(uint32 *multixacts, MultiXactOffset *nextOffset, MultiXactId *oldestMultiXactId, MultiXactOffset *oldestOffset)
Definition: multixact.c:2469
bool MultiXactIdIsRunning(MultiXactId multi, bool isLockOnly)
Definition: multixact.c:465
void MultiXactIdSetOldestMember(void)
Definition: multixact.c:539
static void PerformMembersTruncation(MultiXactOffset oldestOffset, MultiXactOffset newOldestOffset)
Definition: multixact.c:2589
#define MULTIXACT_MEMBER_LOW_THRESHOLD
Definition: multixact.c:98
static MemoryContext MXactContext
Definition: multixact.c:248
#define SHARED_MULTIXACT_STATE_SIZE
static MultiXactId * OldestVisibleMXactId
Definition: multixact.c:218
struct mxtruncinfo mxtruncinfo
static int mxactMemberComparator(const void *arg1, const void *arg2)
Definition: multixact.c:1351
struct MultiXactStateData MultiXactStateData
static void ExtendMultiXactOffset(MultiXactId multi)
Definition: multixact.c:2210
Size MultiXactShmemSize(void)
Definition: multixact.c:1709
#define MultiXactOffsetCtl
Definition: multixact.c:119
void MultiXactGetCheckptMulti(bool is_shutdown, MultiXactId *nextMulti, MultiXactOffset *nextMultiOffset, MultiXactId *oldestMulti, Oid *oldestMultiDB)
Definition: multixact.c:1948
static void RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset, int nmembers, MultiXactMember *members)
Definition: multixact.c:759
int multixactmemberssyncfiletag(const FileTag *ftag, char *path)
Definition: multixact.c:2995
#define MAX_CACHE_ENTRIES
Definition: multixact.c:246
static MultiXactId NextMultiXactId(MultiXactId multi)
Definition: multixact.c:102
MultiXactId GetOldestMultiXactId(void)
Definition: multixact.c:2305
void CheckPointMultiXact(void)
Definition: multixact.c:1970
#define MaxOldestSlot
Definition: multixact.c:213
MultiXactId MultiXactIdCreateFromMembers(int nmembers, MultiXactMember *members)
Definition: multixact.c:658
struct mXactCacheEnt mXactCacheEnt
static MultiXactId mXactCacheGetBySet(int nmembers, MultiXactMember *members)
Definition: multixact.c:1381
static dclist_head MXactCache
Definition: multixact.c:247
void TrimMultiXact(void)
Definition: multixact.c:1836
#define debug_elog3(a, b, c)
Definition: multixact.c:258
char * mxid_to_string(MultiXactId multi, int nmembers, MultiXactMember *members)
Definition: multixact.c:1539
#define debug_elog4(a, b, c, d)
Definition: multixact.c:259
void multixact_twophase_postabort(FullTransactionId fxid, uint16 info, void *recdata, uint32 len)
Definition: multixact.c:1697
static bool MultiXactOffsetPagePrecedes(int64 page1, int64 page2)
Definition: multixact.c:2800
int MultiXactMemberFreezeThreshold(void)
Definition: multixact.c:2513
static void SetOldestOffset(void)
Definition: multixact.c:2340
void MultiXactAdvanceNextMXact(MultiXactId minMulti, MultiXactOffset minMultiOffset)
Definition: multixact.c:2166
static MultiXactId * OldestMemberMXactId
Definition: multixact.c:217
static MultiXactStateData * MultiXactState
Definition: multixact.c:216
MultiXactId ReadNextMultiXactId(void)
Definition: multixact.c:622
void BootStrapMultiXact(void)
Definition: multixact.c:1795
#define debug_elog6(a, b, c, d, e, f)
Definition: multixact.c:261
void multixact_twophase_recover(FullTransactionId fxid, uint16 info, void *recdata, uint32 len)
Definition: multixact.c:1661
MultiXactId MultiXactIdCreate(TransactionId xid1, MultiXactStatus status1, TransactionId xid2, MultiXactStatus status2)
Definition: multixact.c:301
void TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
Definition: multixact.c:2624
bool check_multixact_offset_buffers(int *newval, void **extra, GucSource source)
Definition: multixact.c:1775
bool check_multixact_member_buffers(int *newval, void **extra, GucSource source)
Definition: multixact.c:1784
void AtEOXact_MultiXact(void)
Definition: multixact.c:1570
#define MULTIXACT_MEMBER_HIGH_THRESHOLD
Definition: multixact.c:99
static SlruCtlData MultiXactMemberCtlData
Definition: multixact.c:117
#define debug_elog2(a, b)
Definition: multixact.c:257
void StartupMultiXact(void)
Definition: multixact.c:1811
void SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid)
Definition: multixact.c:2016
int GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members, bool from_pgupgrade, bool isLockOnly)
Definition: multixact.c:1115
#define MultiXactIdIsValid(multi)
Definition: multixact.h:29
#define XLOG_MULTIXACT_ZERO_MEM_PAGE
Definition: multixact.h:68
#define XLOG_MULTIXACT_ZERO_OFF_PAGE
Definition: multixact.h:67
#define FirstMultiXactId
Definition: multixact.h:26
MultiXactStatus
Definition: multixact.h:37
@ MultiXactStatusForShare
Definition: multixact.h:39
@ MultiXactStatusForNoKeyUpdate
Definition: multixact.h:40
@ MultiXactStatusNoKeyUpdate
Definition: multixact.h:43
@ MultiXactStatusUpdate
Definition: multixact.h:45
@ MultiXactStatusForUpdate
Definition: multixact.h:41
@ MultiXactStatusForKeyShare
Definition: multixact.h:38
#define ISUPDATE_from_mxstatus(status)
Definition: multixact.h:51
#define InvalidMultiXactId
Definition: multixact.h:25
#define XLOG_MULTIXACT_TRUNCATE_ID
Definition: multixact.h:70
#define SizeOfMultiXactCreate
Definition: multixact.h:80
#define SizeOfMultiXactTruncate
Definition: multixact.h:95
#define XLOG_MULTIXACT_CREATE_ID
Definition: multixact.h:69
#define MaxMultiXactId
Definition: multixact.h:27
struct MultiXactMember MultiXactMember
static int64 MultiXactIdToOffsetSegment(MultiXactId multi)
static int64 MXOffsetToMemberSegment(MultiXactOffset offset)
#define MXACT_MEMBER_BITS_PER_XACT
static int MXOffsetToFlagsBitShift(MultiXactOffset32 offset)
#define MXACT_MEMBER_XACT_BITMASK
static int64 MXOffsetToMemberPage(MultiXactOffset32 offset)
#define MULTIXACT_OFFSETS_PER_PAGE
static int MXOffsetToMemberOffset(MultiXactOffset32 offset)
static int MultiXactIdToOffsetEntry(MultiXactId multi)
static int64 MultiXactIdToOffsetPage(MultiXactId multi)
#define MULTIXACT_MEMBERS_PER_PAGE
static int MXOffsetToFlagsOffset(MultiXactOffset32 offset)
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:42
const void size_t len
const void * data
static char * filename
Definition: pg_dumpall.c:120
static rewind_source * source
Definition: pg_rewind.c:89
static char buf[DEFAULT_XLOG_SEG_SIZE]
Definition: pg_test_fsync.c:71
void SendPostmasterSignal(PMSignalReason reason)
Definition: pmsignal.c:165
@ PMSIGNAL_START_AUTOVAC_LAUNCHER
Definition: pmsignal.h:39
#define qsort(a, b, c, d)
Definition: port.h:499
unsigned int Oid
Definition: postgres_ext.h:32
#define DELAY_CHKPT_START
Definition: proc.h:135
bool TransactionIdIsInProgress(TransactionId xid)
Definition: procarray.c:1404
int ProcNumber
Definition: procnumber.h:24
tree ctl
Definition: radixtree.h:1838
Size add_size(Size s1, Size s2)
Definition: shmem.c:495
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:389
void SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, const char *subdir, int buffer_tranche_id, int bank_tranche_id, SyncRequestHandler sync_handler, bool long_segment_names)
Definition: slru.c:252
int SimpleLruReadPage_ReadOnly(SlruCtl ctl, int64 pageno, TransactionId xid)
Definition: slru.c:630
void SimpleLruWriteAll(SlruCtl ctl, bool allow_redirtied)
Definition: slru.c:1347
bool SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int64 pageno)
Definition: slru.c:771
bool SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data)
Definition: slru.c:1816
int SimpleLruReadPage(SlruCtl ctl, int64 pageno, bool write_ok, TransactionId xid)
Definition: slru.c:527
int SlruSyncFileTag(SlruCtl ctl, const FileTag *ftag, char *path)
Definition: slru.c:1856
int SimpleLruZeroPage(SlruCtl ctl, int64 pageno)
Definition: slru.c:375
void SimpleLruZeroAndWritePage(SlruCtl ctl, int64 pageno)
Definition: slru.c:444
void SimpleLruTruncate(SlruCtl ctl, int64 cutoffPage)
Definition: slru.c:1433
Size SimpleLruShmemSize(int nslots, int nlsns)
Definition: slru.c:198
bool check_slru_buffers(const char *name, int *newval)
Definition: slru.c:355
static LWLock * SimpleLruGetBankLock(SlruCtl ctl, int64 pageno)
Definition: slru.h:160
#define SlruPagePrecedesUnitTests(ctl, per_page)
Definition: slru.h:185
PGPROC * MyProc
Definition: proc.c:67
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:145
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:242
void initStringInfo(StringInfo str)
Definition: stringinfo.c:97
Definition: sync.h:51
Definition: lwlock.h:42
TransactionId xid
Definition: multixact.h:57
MultiXactStatus status
Definition: multixact.h:58
MultiXactId multiWrapLimit
Definition: multixact.c:158
MultiXactId multiStopLimit
Definition: multixact.c:157
MultiXactId multiWarnLimit
Definition: multixact.c:156
MultiXactId multiVacLimit
Definition: multixact.c:155
MultiXactOffset nextOffset
Definition: multixact.c:135
MultiXactId nextMXact
Definition: multixact.c:132
MultiXactId oldestMultiXactId
Definition: multixact.c:145
MultiXactId perBackendXactIds[FLEXIBLE_ARRAY_MEMBER]
Definition: multixact.c:207
MultiXactOffset oldestOffset
Definition: multixact.c:152
int delayChkptFlags
Definition: proc.h:257
dlist_node * cur
Definition: ilist.h:179
MultiXactId multi
Definition: multixact.c:240
dlist_node node
Definition: multixact.c:242
MultiXactMember members[FLEXIBLE_ARRAY_MEMBER]
Definition: multixact.c:243
int64 earliestExistingPage
Definition: multixact.c:2563
MultiXactId mid
Definition: multixact.h:74
MultiXactMember members[FLEXIBLE_ARRAY_MEMBER]
Definition: multixact.h:77
MultiXactOffset moff
Definition: multixact.h:75
MultiXactId endTruncOff
Definition: multixact.h:88
MultiXactOffset startTruncMemb
Definition: multixact.h:91
MultiXactOffset endTruncMemb
Definition: multixact.h:92
MultiXactId startTruncOff
Definition: multixact.h:87
@ SYNC_HANDLER_MULTIXACT_MEMBER
Definition: sync.h:41
@ SYNC_HANDLER_MULTIXACT_OFFSET
Definition: sync.h:40
bool TransactionIdDidCommit(TransactionId transactionId)
Definition: transam.c:126
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
#define TransactionIdIsValid(xid)
Definition: transam.h:41
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.h:263
ProcNumber TwoPhaseGetDummyProcNumber(FullTransactionId fxid, bool lock_held)
Definition: twophase.c:908
void RegisterTwoPhaseRecord(TwoPhaseRmgrId rmid, uint16 info, const void *data, uint32 len)
Definition: twophase.c:1271
#define TWOPHASE_RM_MULTIXACT_ID
Definition: twophase_rmgr.h:29
void AdvanceNextFullTransactionIdPastXid(TransactionId xid)
Definition: varsup.c:304
bool IsTransactionState(void)
Definition: xact.c:388
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition: xact.c:942
bool RecoveryInProgress(void)
Definition: xlog.c:6461
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2784
uint64 XLogRecPtr
Definition: xlogdefs.h:21
XLogRecPtr XLogSimpleInsertInt64(RmgrId rmid, uint8 info, int64 value)
Definition: xloginsert.c:543
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:478
void XLogRegisterData(const void *data, uint32 len)
Definition: xloginsert.c:368
void XLogBeginInsert(void)
Definition: xloginsert.c:152
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:409
#define XLogRecGetData(decoder)
Definition: xlogreader.h:414
#define XLogRecGetXid(decoder)
Definition: xlogreader.h:411
#define XLogRecHasAnyBlockRefs(decoder)
Definition: xlogreader.h:416
bool InRecovery
Definition: xlogutils.c:50