diff options
| -rw-r--r-- | doc/src/sgml/client-auth.sgml | 12 | ||||
| -rw-r--r-- | doc/src/sgml/config.sgml | 29 | ||||
| -rw-r--r-- | doc/src/sgml/high-availability.sgml | 12 | ||||
| -rw-r--r-- | doc/src/sgml/logical-replication.sgml | 6 | ||||
| -rw-r--r-- | doc/src/sgml/logicaldecoding.sgml | 6 | ||||
| -rw-r--r-- | src/backend/access/brin/brin.c | 8 | ||||
| -rw-r--r-- | src/backend/access/gin/gininsert.c | 4 | ||||
| -rw-r--r-- | src/backend/access/heap/heapam_visibility.c | 293 | ||||
| -rw-r--r-- | src/backend/access/nbtree/nbtree.c | 10 | ||||
| -rw-r--r-- | src/backend/parser/parse_utilcmd.c | 2 | ||||
| -rw-r--r-- | src/backend/storage/buffer/bufmgr.c | 36 | ||||
| -rw-r--r-- | src/backend/storage/lmgr/lwlock.c | 8 | ||||
| -rw-r--r-- | src/backend/utils/mmgr/mcxt.c | 58 | ||||
| -rw-r--r-- | src/backend/utils/time/snapmgr.c | 5 | ||||
| -rw-r--r-- | src/include/utils/snapmgr.h | 3 | ||||
| -rw-r--r-- | src/test/modules/test_cloexec/Makefile | 17 | ||||
| -rw-r--r-- | src/test/modules/test_cloexec/test_cloexec.c | 60 |
17 files changed, 204 insertions, 365 deletions
diff --git a/doc/src/sgml/client-auth.sgml b/doc/src/sgml/client-auth.sgml index eb795bb0f21..a347ee18980 100644 --- a/doc/src/sgml/client-auth.sgml +++ b/doc/src/sgml/client-auth.sgml @@ -889,16 +889,16 @@ host all all 192.168.0.0/16 ident map=omicro # list of names of administrators. Passwords are required in all cases. # # TYPE DATABASE USER ADDRESS METHOD -local sameuser all md5 -local all /^.*helpdesk$ md5 -local all @admins md5 -local all +support md5 +local sameuser all scram-sha-256 +local all /^.*helpdesk$ scram-sha-256 +local all @admins scram-sha-256 +local all +support scram-sha-256 # The last two lines above can be combined into a single line: -local all @admins,+support md5 +local all @admins,+support scram-sha-256 # The database column can also use lists and file names: -local db1,db2,@demodbs all md5 +local db1,db2,@demodbs all scram-sha-256 </programlisting> </example> </sect1> diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 405c9689bd0..1c23538d3c5 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -2075,7 +2075,7 @@ include_dir 'conf.d' <para> Specifies the maximum amount of memory to be used by logical decoding, before some of the decoded changes are written to local disk. This - limits the amount of memory used by logical streaming replication + limits the amount of memory used by streaming logical replication connections. It defaults to 64 megabytes (<literal>64MB</literal>). Since each replication connection only uses a single buffer of this size, and an installation normally doesn't have many such connections @@ -3800,7 +3800,7 @@ include_dir 'conf.d' difference between the two modes, but when set to <literal>always</literal> the WAL archiver is enabled also during archive recovery or standby mode. In <literal>always</literal> mode, all files restored from the archive - or streamed with streaming replication will be archived (again). See + or streamed with streaming physical replication will be archived (again). See <xref linkend="continuous-archiving-in-standby"/> for details. </para> <para> @@ -3906,7 +3906,7 @@ include_dir 'conf.d' full files. Therefore, it is unwise to use a very short <varname>archive_timeout</varname> — it will bloat your archive storage. <varname>archive_timeout</varname> settings of a minute or so are - usually reasonable. You should consider using streaming replication, + usually reasonable. You should consider using streaming physical replication, instead of archiving, if you want data to be copied off the primary server more quickly than that. If this value is specified without units, it is taken as seconds. @@ -3931,7 +3931,7 @@ include_dir 'conf.d' <para> This section describes the settings that apply to recovery in general, - affecting crash recovery, streaming replication and archive-based + affecting crash recovery, streaming physical replication and archive-based replication. </para> @@ -4042,7 +4042,7 @@ include_dir 'conf.d' <para> The local shell command to execute to retrieve an archived segment of the WAL file series. This parameter is required for archive recovery, - but optional for streaming replication. + but optional for streaming physical replication. Any <literal>%f</literal> in the string is replaced by the name of the file to retrieve from the archive, and any <literal>%p</literal> is replaced by the copy destination path name @@ -4468,15 +4468,16 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows <title>Replication</title> <para> - These settings control the behavior of the built-in - <firstterm>streaming replication</firstterm> feature (see - <xref linkend="streaming-replication"/>), and the built-in - <firstterm>logical replication</firstterm> feature (see + These settings control the behavior of + <firstterm>streaming replication</firstterm>, + both <firstterm>physical replication</firstterm> + (see <xref linkend="streaming-replication"/>) and + <firstterm>logical replication</firstterm> (see <xref linkend="logical-replication"/>). </para> <para> - For <emphasis>streaming replication</emphasis>, servers will be either a + For <emphasis>physical replication</emphasis>, servers will be either a primary or a standby server. Primaries can send data, while standbys are always receivers of replicated data. When cascading replication (see <xref linkend="cascading-replication"/>) is used, standby servers @@ -4907,7 +4908,7 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class=" These settings control the behavior of a <link linkend="standby-server-operation">standby server</link> that is - to receive replication data. Their values on the primary server + to receive physical replication data. Their values on the primary server are irrelevant. </para> @@ -5047,7 +5048,7 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class=" conflict with about-to-be-applied WAL entries, as described in <xref linkend="hot-standby-conflict"/>. <varname>max_standby_streaming_delay</varname> applies when WAL data is - being received via streaming replication. + being received via streaming physical replication. If this value is specified without units, it is taken as milliseconds. The default is 30 seconds. A value of -1 allows the standby to wait forever for conflicting @@ -5183,7 +5184,7 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class=" <listitem> <para> Specifies how long the standby server should wait when WAL data is not - available from any sources (streaming replication, + available from any sources (streaming physical replication, local <filename>pg_wal</filename> or WAL archive) before trying again to retrieve WAL data. If this value is specified without units, it is taken as milliseconds. @@ -5260,7 +5261,7 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class=" <filename>pg_wal</filename> directory. </para> <para> - This parameter is intended for use with streaming replication deployments; + This parameter is intended for use with streaming physical replication deployments; however, if the parameter is specified it will be honored in all cases except crash recovery. diff --git a/doc/src/sgml/high-availability.sgml b/doc/src/sgml/high-availability.sgml index 81eeadd6c47..33ca3f0286c 100644 --- a/doc/src/sgml/high-availability.sgml +++ b/doc/src/sgml/high-availability.sgml @@ -151,7 +151,7 @@ protocol to make nodes agree on a serializable transactional order. </para> <para> A standby server can be implemented using file-based log shipping - (<xref linkend="warm-standby"/>) or streaming replication (see + (<xref linkend="warm-standby"/>) or streaming physical replication (see <xref linkend="streaming-replication"/>), or a combination of both. For information on hot standby, see <xref linkend="hot-standby"/>. </para> @@ -628,7 +628,7 @@ protocol to make nodes agree on a serializable transactional order. In standby mode, the server continuously applies WAL received from the primary server. The standby server can read WAL from a WAL archive (see <xref linkend="guc-restore-command"/>) or directly from the primary - over a TCP connection (streaming replication). The standby server will + over a TCP connection (streaming physical replication). The standby server will also attempt to restore any WAL found in the standby cluster's <filename>pg_wal</filename> directory. That typically happens after a server restart, when the standby replays again WAL that was streamed from the @@ -772,6 +772,14 @@ archive_cleanup_command = 'pg_archivecleanup /path/to/archive "%r"' generated, without waiting for the WAL file to be filled. </para> + <note> + <para> + This discussion of streaming replication assumes physical replication. + Although you could treat a logical replication subscriber as a warm standby, + it would require some differences to what is described here. + </para> + </note> + <para> Streaming replication is asynchronous by default (see <xref linkend="synchronous-replication"/>), in which case there is diff --git a/doc/src/sgml/logical-replication.sgml b/doc/src/sgml/logical-replication.sgml index aa013f348d4..b3faaa675ef 100644 --- a/doc/src/sgml/logical-replication.sgml +++ b/doc/src/sgml/logical-replication.sgml @@ -6,7 +6,7 @@ <para> Logical replication is a method of replicating data objects and their changes, based upon their replication identity (usually a primary key). We - use the term logical in contrast to physical replication, which uses exact + use the term logical replication in contrast to physical replication, which uses exact block addresses and byte-by-byte replication. PostgreSQL supports both mechanisms concurrently, see <xref linkend="high-availability"/>. Logical replication allows fine-grained control over both data replication and @@ -2496,8 +2496,8 @@ CONTEXT: processing remote data for replication origin "pg_16395" during "INSER <title>Monitoring</title> <para> - Because logical replication is based on a similar architecture as - <link linkend="streaming-replication">physical streaming replication</link>, + Because streaming logical replication is based on a similar architecture as + <link linkend="streaming-replication">streaming physical replication</link>, the monitoring on a publication node is similar to monitoring of a physical replication primary (see <xref linkend="streaming-replication-monitoring"/>). diff --git a/doc/src/sgml/logicaldecoding.sgml b/doc/src/sgml/logicaldecoding.sgml index cae8a376c3b..6368e46ce93 100644 --- a/doc/src/sgml/logicaldecoding.sgml +++ b/doc/src/sgml/logicaldecoding.sgml @@ -275,9 +275,9 @@ postgres=# SELECT * from pg_logical_slot_get_changes('regression_slot', NULL, NU </para> <note> - <para><productname>PostgreSQL</productname> also has streaming replication slots - (see <xref linkend="streaming-replication"/>), but they are used somewhat - differently there. + <para><productname>PostgreSQL</productname> can also use streaming replication slots + to maintain a standby server (see <xref linkend="streaming-replication"/>), but + typically those use physical replication, not logical. </para> </note> diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index 26cb75058d1..45d306037a4 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -1478,8 +1478,8 @@ brin_summarize_range(PG_FUNCTION_ARGS) /* Restore userid and security context */ SetUserIdAndSecContext(save_userid, save_sec_context); - relation_close(indexRel, ShareUpdateExclusiveLock); - relation_close(heapRel, ShareUpdateExclusiveLock); + index_close(indexRel, ShareUpdateExclusiveLock); + table_close(heapRel, ShareUpdateExclusiveLock); PG_RETURN_INT32((int32) numSummarized); } @@ -1568,8 +1568,8 @@ brin_desummarize_range(PG_FUNCTION_ARGS) errmsg("index \"%s\" is not valid", RelationGetRelationName(indexRel)))); - relation_close(indexRel, ShareUpdateExclusiveLock); - relation_close(heapRel, ShareUpdateExclusiveLock); + index_close(indexRel, ShareUpdateExclusiveLock); + table_close(heapRel, ShareUpdateExclusiveLock); PG_RETURN_VOID(); } diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index df30dcc0228..88246071c4b 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -1784,7 +1784,7 @@ _gin_parallel_merge(GinBuildState *state) ++numtuples); } - /* relase all the memory */ + /* release all the memory */ GinBufferFree(buffer); tuplesort_end(state->bs_sortstate); @@ -1972,7 +1972,7 @@ _gin_process_worker_data(GinBuildState *state, Tuplesortstate *worker_sort, GinBufferReset(buffer); } - /* relase all the memory */ + /* release all the memory */ GinBufferFree(buffer); tuplesort_end(worker_sort); diff --git a/src/backend/access/heap/heapam_visibility.c b/src/backend/access/heap/heapam_visibility.c index 05f6946fe60..bf899c2d2c6 100644 --- a/src/backend/access/heap/heapam_visibility.c +++ b/src/backend/access/heap/heapam_visibility.c @@ -144,6 +144,55 @@ HeapTupleSetHintBits(HeapTupleHeader tuple, Buffer buffer, SetHintBits(tuple, buffer, infomask, xid); } +/* + * If HEAP_MOVED_OFF or HEAP_MOVED_IN are set on the tuple, remove them and + * adjust hint bits. See the comment for SetHintBits() for more background. + * + * This helper returns false if the row ought to be invisible, true otherwise. + */ +static inline bool +HeapTupleCleanMoved(HeapTupleHeader tuple, Buffer buffer) +{ + TransactionId xvac; + + /* only used by pre-9.0 binary upgrades */ + if (likely(!(tuple->t_infomask & (HEAP_MOVED_OFF | HEAP_MOVED_IN)))) + return true; + + xvac = HeapTupleHeaderGetXvac(tuple); + + if (TransactionIdIsCurrentTransactionId(xvac)) + elog(ERROR, "encountered tuple with HEAP_MOVED considered current"); + + if (TransactionIdIsInProgress(xvac)) + elog(ERROR, "encountered tuple with HEAP_MOVED considered in-progress"); + + if (tuple->t_infomask & HEAP_MOVED_OFF) + { + if (TransactionIdDidCommit(xvac)) + { + SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, + InvalidTransactionId); + return false; + } + SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, + InvalidTransactionId); + } + else if (tuple->t_infomask & HEAP_MOVED_IN) + { + if (TransactionIdDidCommit(xvac)) + SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, + InvalidTransactionId); + else + { + SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, + InvalidTransactionId); + return false; + } + } + + return true; +} /* * HeapTupleSatisfiesSelf @@ -179,45 +228,8 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer) if (HeapTupleHeaderXminInvalid(tuple)) return false; - /* Used by pre-9.0 binary upgrades */ - if (tuple->t_infomask & HEAP_MOVED_OFF) - { - TransactionId xvac = HeapTupleHeaderGetXvac(tuple); - - if (TransactionIdIsCurrentTransactionId(xvac)) - return false; - if (!TransactionIdIsInProgress(xvac)) - { - if (TransactionIdDidCommit(xvac)) - { - SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, - InvalidTransactionId); - return false; - } - SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, - InvalidTransactionId); - } - } - /* Used by pre-9.0 binary upgrades */ - else if (tuple->t_infomask & HEAP_MOVED_IN) - { - TransactionId xvac = HeapTupleHeaderGetXvac(tuple); - - if (!TransactionIdIsCurrentTransactionId(xvac)) - { - if (TransactionIdIsInProgress(xvac)) - return false; - if (TransactionIdDidCommit(xvac)) - SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, - InvalidTransactionId); - else - { - SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, - InvalidTransactionId); - return false; - } - } - } + if (!HeapTupleCleanMoved(tuple, buffer)) + return false; else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple))) { if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ @@ -372,45 +384,8 @@ HeapTupleSatisfiesToast(HeapTuple htup, Snapshot snapshot, if (HeapTupleHeaderXminInvalid(tuple)) return false; - /* Used by pre-9.0 binary upgrades */ - if (tuple->t_infomask & HEAP_MOVED_OFF) - { - TransactionId xvac = HeapTupleHeaderGetXvac(tuple); - - if (TransactionIdIsCurrentTransactionId(xvac)) - return false; - if (!TransactionIdIsInProgress(xvac)) - { - if (TransactionIdDidCommit(xvac)) - { - SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, - InvalidTransactionId); - return false; - } - SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, - InvalidTransactionId); - } - } - /* Used by pre-9.0 binary upgrades */ - else if (tuple->t_infomask & HEAP_MOVED_IN) - { - TransactionId xvac = HeapTupleHeaderGetXvac(tuple); - - if (!TransactionIdIsCurrentTransactionId(xvac)) - { - if (TransactionIdIsInProgress(xvac)) - return false; - if (TransactionIdDidCommit(xvac)) - SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, - InvalidTransactionId); - else - { - SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, - InvalidTransactionId); - return false; - } - } - } + if (!HeapTupleCleanMoved(tuple, buffer)) + return false; /* * An invalid Xmin can be left behind by a speculative insertion that @@ -468,45 +443,8 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, if (HeapTupleHeaderXminInvalid(tuple)) return TM_Invisible; - /* Used by pre-9.0 binary upgrades */ - if (tuple->t_infomask & HEAP_MOVED_OFF) - { - TransactionId xvac = HeapTupleHeaderGetXvac(tuple); - - if (TransactionIdIsCurrentTransactionId(xvac)) - return TM_Invisible; - if (!TransactionIdIsInProgress(xvac)) - { - if (TransactionIdDidCommit(xvac)) - { - SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, - InvalidTransactionId); - return TM_Invisible; - } - SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, - InvalidTransactionId); - } - } - /* Used by pre-9.0 binary upgrades */ - else if (tuple->t_infomask & HEAP_MOVED_IN) - { - TransactionId xvac = HeapTupleHeaderGetXvac(tuple); - - if (!TransactionIdIsCurrentTransactionId(xvac)) - { - if (TransactionIdIsInProgress(xvac)) - return TM_Invisible; - if (TransactionIdDidCommit(xvac)) - SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, - InvalidTransactionId); - else - { - SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, - InvalidTransactionId); - return TM_Invisible; - } - } - } + else if (!HeapTupleCleanMoved(tuple, buffer)) + return TM_Invisible; else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple))) { if (HeapTupleHeaderGetCmin(tuple) >= curcid) @@ -756,45 +694,8 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, if (HeapTupleHeaderXminInvalid(tuple)) return false; - /* Used by pre-9.0 binary upgrades */ - if (tuple->t_infomask & HEAP_MOVED_OFF) - { - TransactionId xvac = HeapTupleHeaderGetXvac(tuple); - - if (TransactionIdIsCurrentTransactionId(xvac)) - return false; - if (!TransactionIdIsInProgress(xvac)) - { - if (TransactionIdDidCommit(xvac)) - { - SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, - InvalidTransactionId); - return false; - } - SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, - InvalidTransactionId); - } - } - /* Used by pre-9.0 binary upgrades */ - else if (tuple->t_infomask & HEAP_MOVED_IN) - { - TransactionId xvac = HeapTupleHeaderGetXvac(tuple); - - if (!TransactionIdIsCurrentTransactionId(xvac)) - { - if (TransactionIdIsInProgress(xvac)) - return false; - if (TransactionIdDidCommit(xvac)) - SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, - InvalidTransactionId); - else - { - SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, - InvalidTransactionId); - return false; - } - } - } + if (!HeapTupleCleanMoved(tuple, buffer)) + return false; else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple))) { if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ @@ -979,45 +880,8 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, if (HeapTupleHeaderXminInvalid(tuple)) return false; - /* Used by pre-9.0 binary upgrades */ - if (tuple->t_infomask & HEAP_MOVED_OFF) - { - TransactionId xvac = HeapTupleHeaderGetXvac(tuple); - - if (TransactionIdIsCurrentTransactionId(xvac)) - return false; - if (!XidInMVCCSnapshot(xvac, snapshot)) - { - if (TransactionIdDidCommit(xvac)) - { - SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, - InvalidTransactionId); - return false; - } - SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, - InvalidTransactionId); - } - } - /* Used by pre-9.0 binary upgrades */ - else if (tuple->t_infomask & HEAP_MOVED_IN) - { - TransactionId xvac = HeapTupleHeaderGetXvac(tuple); - - if (!TransactionIdIsCurrentTransactionId(xvac)) - { - if (XidInMVCCSnapshot(xvac, snapshot)) - return false; - if (TransactionIdDidCommit(xvac)) - SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, - InvalidTransactionId); - else - { - SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, - InvalidTransactionId); - return false; - } - } - } + if (!HeapTupleCleanMoved(tuple, buffer)) + return false; else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple))) { if (HeapTupleHeaderGetCmin(tuple) >= snapshot->curcid) @@ -1222,43 +1086,8 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de { if (HeapTupleHeaderXminInvalid(tuple)) return HEAPTUPLE_DEAD; - /* Used by pre-9.0 binary upgrades */ - else if (tuple->t_infomask & HEAP_MOVED_OFF) - { - TransactionId xvac = HeapTupleHeaderGetXvac(tuple); - - if (TransactionIdIsCurrentTransactionId(xvac)) - return HEAPTUPLE_DELETE_IN_PROGRESS; - if (TransactionIdIsInProgress(xvac)) - return HEAPTUPLE_DELETE_IN_PROGRESS; - if (TransactionIdDidCommit(xvac)) - { - SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, - InvalidTransactionId); - return HEAPTUPLE_DEAD; - } - SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, - InvalidTransactionId); - } - /* Used by pre-9.0 binary upgrades */ - else if (tuple->t_infomask & HEAP_MOVED_IN) - { - TransactionId xvac = HeapTupleHeaderGetXvac(tuple); - - if (TransactionIdIsCurrentTransactionId(xvac)) - return HEAPTUPLE_INSERT_IN_PROGRESS; - if (TransactionIdIsInProgress(xvac)) - return HEAPTUPLE_INSERT_IN_PROGRESS; - if (TransactionIdDidCommit(xvac)) - SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, - InvalidTransactionId); - else - { - SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, - InvalidTransactionId); - return HEAPTUPLE_DEAD; - } - } + else if (!HeapTupleCleanMoved(tuple, buffer)) + return HEAPTUPLE_DEAD; else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple))) { if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 6197b725fb1..b4425231935 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -437,16 +437,6 @@ btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, * not already done in a previous rescan call. To save on palloc * overhead, both workspaces are allocated as one palloc block; only this * function and btendscan know that. - * - * NOTE: this data structure also makes it safe to return data from a - * "name" column, even though btree name_ops uses an underlying storage - * datatype of cstring. The risk there is that "name" is supposed to be - * padded to NAMEDATALEN, but the actual index tuple is probably shorter. - * However, since we only return data out of tuples sitting in the - * currTuples array, a fetch of NAMEDATALEN bytes can at worst pull some - * data out of the markTuples array --- running off the end of memory for - * a SIGSEGV is not possible. Yeah, this is ugly as sin, but it beats - * adding special-case treatment for name_ops elsewhere. */ if (scan->xs_want_itup && so->currTuples == NULL) { diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c index 375b40b29af..2b7b084f216 100644 --- a/src/backend/parser/parse_utilcmd.c +++ b/src/backend/parser/parse_utilcmd.c @@ -2572,7 +2572,7 @@ transformIndexConstraint(Constraint *constraint, CreateStmtContext *cxt) } /* Close the index relation but keep the lock */ - relation_close(index_rel, NoLock); + index_close(index_rel, NoLock); index->indexOid = index_oid; } diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index a768fb129ae..eb55102b0d7 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -6358,23 +6358,41 @@ rlocator_comparator(const void *p1, const void *p2) uint32 LockBufHdr(BufferDesc *desc) { - SpinDelayStatus delayStatus; uint32 old_buf_state; Assert(!BufferIsLocal(BufferDescriptorGetBuffer(desc))); - init_local_spin_delay(&delayStatus); - while (true) { - /* set BM_LOCKED flag */ + /* + * Always try once to acquire the lock directly, without setting up + * the spin-delay infrastructure. The work necessary for that shows up + * in profiles and is rarely necessary. + */ old_buf_state = pg_atomic_fetch_or_u32(&desc->state, BM_LOCKED); - /* if it wasn't set before we're OK */ - if (!(old_buf_state & BM_LOCKED)) - break; - perform_spin_delay(&delayStatus); + if (likely(!(old_buf_state & BM_LOCKED))) + break; /* got lock */ + + /* and then spin without atomic operations until lock is released */ + { + SpinDelayStatus delayStatus; + + init_local_spin_delay(&delayStatus); + + while (old_buf_state & BM_LOCKED) + { + perform_spin_delay(&delayStatus); + old_buf_state = pg_atomic_read_u32(&desc->state); + } + finish_spin_delay(&delayStatus); + } + + /* + * Retry. The lock might obviously already be re-acquired by the time + * we're attempting to get it again. + */ } - finish_spin_delay(&delayStatus); + return old_buf_state | BM_LOCKED; } diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index 255cfa8fa95..b839ace57cb 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -870,9 +870,13 @@ LWLockWaitListLock(LWLock *lock) while (true) { - /* always try once to acquire lock directly */ + /* + * Always try once to acquire the lock directly, without setting up + * the spin-delay infrastructure. The work necessary for that shows up + * in profiles and is rarely necessary. + */ old_state = pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_LOCKED); - if (!(old_state & LW_FLAG_LOCKED)) + if (likely(!(old_state & LW_FLAG_LOCKED))) break; /* got lock */ /* and then spin without atomic operations until lock is released */ diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c index 47fd774c7d2..5c1a06d86fd 100644 --- a/src/backend/utils/mmgr/mcxt.c +++ b/src/backend/utils/mmgr/mcxt.c @@ -174,6 +174,9 @@ MemoryContext CurTransactionContext = NULL; /* This is a transient link to the active portal's memory context: */ MemoryContext PortalContext = NULL; +/* Is memory context logging currently in progress? */ +static bool LogMemoryContextInProgress = false; + static void MemoryContextDeleteOnly(MemoryContext context); static void MemoryContextCallResetCallbacks(MemoryContext context); static void MemoryContextStatsInternal(MemoryContext context, int level, @@ -1339,26 +1342,45 @@ ProcessLogMemoryContextInterrupt(void) LogMemoryContextPending = false; /* - * Use LOG_SERVER_ONLY to prevent this message from being sent to the - * connected client. + * Exit immediately if memory context logging is already in progress. This + * prevents recursive calls, which could occur if logging is requested + * repeatedly and rapidly, potentially leading to infinite recursion and a + * crash. */ - ereport(LOG_SERVER_ONLY, - (errhidestmt(true), - errhidecontext(true), - errmsg("logging memory contexts of PID %d", MyProcPid))); + if (LogMemoryContextInProgress) + return; + LogMemoryContextInProgress = true; - /* - * When a backend process is consuming huge memory, logging all its memory - * contexts might overrun available disk space. To prevent this, we limit - * the depth of the hierarchy, as well as the number of child contexts to - * log per parent to 100. - * - * As with MemoryContextStats(), we suppose that practical cases where the - * dump gets long will typically be huge numbers of siblings under the - * same parent context; while the additional debugging value from seeing - * details about individual siblings beyond 100 will not be large. - */ - MemoryContextStatsDetail(TopMemoryContext, 100, 100, false); + PG_TRY(); + { + /* + * Use LOG_SERVER_ONLY to prevent this message from being sent to the + * connected client. + */ + ereport(LOG_SERVER_ONLY, + (errhidestmt(true), + errhidecontext(true), + errmsg("logging memory contexts of PID %d", MyProcPid))); + + /* + * When a backend process is consuming huge memory, logging all its + * memory contexts might overrun available disk space. To prevent + * this, we limit the depth of the hierarchy, as well as the number of + * child contexts to log per parent to 100. + * + * As with MemoryContextStats(), we suppose that practical cases where + * the dump gets long will typically be huge numbers of siblings under + * the same parent context; while the additional debugging value from + * seeing details about individual siblings beyond 100 will not be + * large. + */ + MemoryContextStatsDetail(TopMemoryContext, 100, 100, false); + } + PG_FINALLY(); + { + LogMemoryContextInProgress = false; + } + PG_END_TRY(); } void * diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c index 40a2e90e071..5af8326d5e8 100644 --- a/src/backend/utils/time/snapmgr.c +++ b/src/backend/utils/time/snapmgr.c @@ -1848,12 +1848,9 @@ RestoreSnapshot(char *start_address) /* * Install a restored snapshot as the transaction snapshot. - * - * The second argument is of type void * so that snapmgr.h need not include - * the declaration for PGPROC. */ void -RestoreTransactionSnapshot(Snapshot snapshot, void *source_pgproc) +RestoreTransactionSnapshot(Snapshot snapshot, PGPROC *source_pgproc) { SetTransactionSnapshot(snapshot, NULL, InvalidPid, source_pgproc); } diff --git a/src/include/utils/snapmgr.h b/src/include/utils/snapmgr.h index 604c1f90216..b663d3bbc8c 100644 --- a/src/include/utils/snapmgr.h +++ b/src/include/utils/snapmgr.h @@ -120,6 +120,7 @@ extern bool HistoricSnapshotActive(void); extern Size EstimateSnapshotSpace(Snapshot snapshot); extern void SerializeSnapshot(Snapshot snapshot, char *start_address); extern Snapshot RestoreSnapshot(char *start_address); -extern void RestoreTransactionSnapshot(Snapshot snapshot, void *source_pgproc); +struct PGPROC; +extern void RestoreTransactionSnapshot(Snapshot snapshot, struct PGPROC *source_pgproc); #endif /* SNAPMGR_H */ diff --git a/src/test/modules/test_cloexec/Makefile b/src/test/modules/test_cloexec/Makefile index 70d38575e26..cd16a59add5 100644 --- a/src/test/modules/test_cloexec/Makefile +++ b/src/test/modules/test_cloexec/Makefile @@ -1,23 +1,14 @@ # src/test/modules/test_cloexec/Makefile -# This module is for Windows only -ifeq ($(PORTNAME),win32) - -MODULE_big = test_cloexec -OBJS = \ - $(WIN32RES) \ - test_cloexec.o - PGFILEDESC = "test_cloexec - test O_CLOEXEC flag handling" +PGAPPICON = win32 -# Build as a standalone program, not a shared library -PROGRAM = test_cloexec -override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS) +PROGRAM += test_cloexec +OBJS += $(WIN32RES) test_cloexec.o +NO_INSTALLCHECK = 1 TAP_TESTS = 1 -endif - ifdef USE_PGXS PG_CONFIG = pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) diff --git a/src/test/modules/test_cloexec/test_cloexec.c b/src/test/modules/test_cloexec/test_cloexec.c index 9f645451684..aec8af9937d 100644 --- a/src/test/modules/test_cloexec/test_cloexec.c +++ b/src/test/modules/test_cloexec/test_cloexec.c @@ -24,21 +24,22 @@ #include "common/file_utils.h" #include "port.h" +#ifdef WIN32 static void run_parent_tests(const char *testfile1, const char *testfile2); static void run_child_tests(const char *handle1_str, const char *handle2_str); static bool try_write_to_handle(HANDLE h, const char *label); +#endif int main(int argc, char *argv[]) { - char testfile1[MAXPGPATH]; - char testfile2[MAXPGPATH]; - /* Windows-only test */ #ifndef WIN32 fprintf(stderr, "This test only runs on Windows\n"); return 0; -#endif +#else + char testfile1[MAXPGPATH]; + char testfile2[MAXPGPATH]; if (argc == 3) { @@ -68,26 +69,26 @@ main(int argc, char *argv[]) fprintf(stderr, "Usage: %s [handle1_hex handle2_hex]\n", argv[0]); return 1; } +#endif } +#ifdef WIN32 static void run_parent_tests(const char *testfile1, const char *testfile2) { -#ifdef WIN32 int fd1, fd2; HANDLE h1, h2; - char cmdline[1024]; - STARTUPINFO si; - PROCESS_INFORMATION pi; + char exe_path[MAXPGPATH]; + char cmdline[MAXPGPATH + 100]; + STARTUPINFO si = {.cb = sizeof(si)}; + PROCESS_INFORMATION pi = {0}; DWORD exit_code; printf("Parent: Opening test files...\n"); - /* - * Open first file WITH O_CLOEXEC - should NOT be inherited - */ + /* Open first file WITH O_CLOEXEC - should NOT be inherited */ fd1 = open(testfile1, O_RDWR | O_CREAT | O_TRUNC | O_CLOEXEC, 0600); if (fd1 < 0) { @@ -95,9 +96,7 @@ run_parent_tests(const char *testfile1, const char *testfile2) exit(1); } - /* - * Open second file WITHOUT O_CLOEXEC - should be inherited - */ + /* Open second file WITHOUT O_CLOEXEC - should be inherited */ fd2 = open(testfile2, O_RDWR | O_CREAT | O_TRUNC, 0600); if (fd2 < 0) { @@ -122,28 +121,11 @@ run_parent_tests(const char *testfile1, const char *testfile2) printf("Parent: fd2=%d (no O_CLOEXEC) -> HANDLE=%p\n", fd2, h2); /* - * Spawn child process with bInheritHandles=TRUE, passing handle values as - * hex strings - */ - snprintf(cmdline, sizeof(cmdline), "\"%s\" %p %p", - GetCommandLine(), h1, h2); - - /* * Find the actual executable path by removing any arguments from - * GetCommandLine(). + * GetCommandLine(), and add our new arguments. */ - { - char exe_path[MAX_PATH]; - char *space_pos; - - GetModuleFileName(NULL, exe_path, sizeof(exe_path)); - snprintf(cmdline, sizeof(cmdline), "\"%s\" %p %p", - exe_path, h1, h2); - } - - memset(&si, 0, sizeof(si)); - si.cb = sizeof(si); - memset(&pi, 0, sizeof(pi)); + GetModuleFileName(NULL, exe_path, sizeof(exe_path)); + snprintf(cmdline, sizeof(cmdline), "\"%s\" %p %p", exe_path, h1, h2); printf("Parent: Spawning child process...\n"); printf("Parent: Command line: %s\n", cmdline); @@ -180,19 +162,19 @@ run_parent_tests(const char *testfile1, const char *testfile2) printf("Parent: Child exit code: %lu\n", exit_code); if (exit_code == 0) + { printf("Parent: SUCCESS - O_CLOEXEC behavior verified\n"); + } else { printf("Parent: FAILURE - O_CLOEXEC not working correctly\n"); exit(1); } -#endif } static void run_child_tests(const char *handle1_str, const char *handle2_str) { -#ifdef WIN32 HANDLE h1, h2; bool h1_worked, @@ -232,13 +214,11 @@ run_child_tests(const char *handle1_str, const char *handle2_str) printf("Child: Test FAILED - O_CLOEXEC not working correctly\n"); exit(1); } -#endif } static bool try_write_to_handle(HANDLE h, const char *label) { -#ifdef WIN32 const char *test_data = "test\n"; DWORD bytes_written; BOOL result; @@ -256,7 +236,5 @@ try_write_to_handle(HANDLE h, const char *label) label, GetLastError()); return false; } -#else - return false; -#endif } +#endif |
