diff options
Diffstat (limited to 'src/include')
25 files changed, 513 insertions, 110 deletions
diff --git a/src/include/access/amapi.h b/src/include/access/amapi.h index 52916bab7a3..70949de56ac 100644 --- a/src/include/access/amapi.h +++ b/src/include/access/amapi.h @@ -293,7 +293,7 @@ typedef struct IndexAmRoutine ambuild_function ambuild; ambuildempty_function ambuildempty; aminsert_function aminsert; - aminsertcleanup_function aminsertcleanup; + aminsertcleanup_function aminsertcleanup; /* can be NULL */ ambulkdelete_function ambulkdelete; amvacuumcleanup_function amvacuumcleanup; amcanreturn_function amcanreturn; /* can be NULL */ diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h index 2cf8d55d706..cc06fc29ab2 100644 --- a/src/include/access/xlog_internal.h +++ b/src/include/access/xlog_internal.h @@ -316,16 +316,6 @@ typedef struct XLogRecData uint32 len; /* length of rmgr data to include */ } XLogRecData; -/* - * Recovery target action. - */ -typedef enum -{ - RECOVERY_TARGET_ACTION_PAUSE, - RECOVERY_TARGET_ACTION_PROMOTE, - RECOVERY_TARGET_ACTION_SHUTDOWN, -} RecoveryTargetAction; - struct LogicalDecodingContext; struct XLogRecordBuffer; diff --git a/src/include/access/xlogrecovery.h b/src/include/access/xlogrecovery.h index 91446303024..8e475e266d1 100644 --- a/src/include/access/xlogrecovery.h +++ b/src/include/access/xlogrecovery.h @@ -40,6 +40,16 @@ typedef enum RECOVERY_TARGET_TIMELINE_NUMERIC, } RecoveryTargetTimeLineGoal; +/* + * Recovery target action. + */ +typedef enum +{ + RECOVERY_TARGET_ACTION_PAUSE, + RECOVERY_TARGET_ACTION_PROMOTE, + RECOVERY_TARGET_ACTION_SHUTDOWN, +} RecoveryTargetAction; + /* Recovery pause states */ typedef enum RecoveryPauseState { diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index a3f3315fed9..5173d422d46 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -57,6 +57,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 202507091 +#define CATALOG_VERSION_NO 202507231 #endif diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 1fc19146f46..3ee8fed7e53 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -11801,6 +11801,10 @@ proname => 'binary_upgrade_replorigin_advance', proisstrict => 'f', provolatile => 'v', proparallel => 'u', prorettype => 'void', proargtypes => 'text pg_lsn', prosrc => 'binary_upgrade_replorigin_advance' }, +{ oid => '9159', descr => 'for use by pg_upgrade (conflict detection slot)', + proname => 'binary_upgrade_create_conflict_detection_slot', proisstrict => 'f', + provolatile => 'v', proparallel => 'u', prorettype => 'void', + proargtypes => '', prosrc => 'binary_upgrade_create_conflict_detection_slot' }, # conversion functions { oid => '4302', diff --git a/src/include/catalog/pg_subscription.h b/src/include/catalog/pg_subscription.h index 20fc329992d..231ef84ec9a 100644 --- a/src/include/catalog/pg_subscription.h +++ b/src/include/catalog/pg_subscription.h @@ -78,6 +78,9 @@ CATALOG(pg_subscription,6100,SubscriptionRelationId) BKI_SHARED_RELATION BKI_ROW * slots) in the upstream database are enabled * to be synchronized to the standbys. */ + bool subretaindeadtuples; /* True if dead tuples useful for + * conflict detection are retained */ + #ifdef CATALOG_VARLEN /* variable-length fields start here */ /* Connection string to the publisher */ text subconninfo BKI_FORCE_NOT_NULL; @@ -131,6 +134,8 @@ typedef struct Subscription * (i.e. the main slot and the table sync * slots) in the upstream database are enabled * to be synchronized to the standbys. */ + bool retaindeadtuples; /* True if dead tuples useful for conflict + * detection are retained */ char *conninfo; /* Connection string to the publisher */ char *slotname; /* Name of the replication slot */ char *synccommit; /* Synchronous commit setting for worker */ diff --git a/src/include/commands/subscriptioncmds.h b/src/include/commands/subscriptioncmds.h index c2262e46a7f..9b288ad22a6 100644 --- a/src/include/commands/subscriptioncmds.h +++ b/src/include/commands/subscriptioncmds.h @@ -28,4 +28,9 @@ extern void AlterSubscriptionOwner_oid(Oid subid, Oid newOwnerId); extern char defGetStreamingMode(DefElem *def); +extern ObjectAddress AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt, bool isTopLevel); + +extern void CheckSubDeadTupleRetention(bool check_guc, bool sub_disabled, + int elevel_for_sub_disabled); + #endif /* SUBSCRIPTIONCMDS_H */ diff --git a/src/include/libpq/libpq-be-fe-helpers.h b/src/include/libpq/libpq-be-fe-helpers.h index 16205b824fa..1c4a342090c 100644 --- a/src/include/libpq/libpq-be-fe-helpers.h +++ b/src/include/libpq/libpq-be-fe-helpers.h @@ -30,17 +30,7 @@ #ifndef LIBPQ_BE_FE_HELPERS_H #define LIBPQ_BE_FE_HELPERS_H -/* - * Despite the name, BUILDING_DLL is set only when building code directly part - * of the backend. Which also is where libpq isn't allowed to be - * used. Obviously this doesn't protect against libpq-fe.h getting included - * otherwise, but perhaps still protects against a few mistakes... - */ -#ifdef BUILDING_DLL -#error "libpq may not be used code directly built into the backend" -#endif - -#include "libpq-fe.h" +#include "libpq/libpq-be-fe.h" #include "miscadmin.h" #include "storage/fd.h" #include "storage/latch.h" @@ -289,41 +279,30 @@ libpqsrv_exec_params(PGconn *conn, static inline PGresult * libpqsrv_get_result_last(PGconn *conn, uint32 wait_event_info) { - PGresult *volatile lastResult = NULL; + PGresult *lastResult = NULL; - /* In what follows, do not leak any PGresults on an error. */ - PG_TRY(); + for (;;) { - for (;;) - { - /* Wait for, and collect, the next PGresult. */ - PGresult *result; - - result = libpqsrv_get_result(conn, wait_event_info); - if (result == NULL) - break; /* query is complete, or failure */ + /* Wait for, and collect, the next PGresult. */ + PGresult *result; - /* - * Emulate PQexec()'s behavior of returning the last result when - * there are many. - */ - PQclear(lastResult); - lastResult = result; + result = libpqsrv_get_result(conn, wait_event_info); + if (result == NULL) + break; /* query is complete, or failure */ - if (PQresultStatus(lastResult) == PGRES_COPY_IN || - PQresultStatus(lastResult) == PGRES_COPY_OUT || - PQresultStatus(lastResult) == PGRES_COPY_BOTH || - PQstatus(conn) == CONNECTION_BAD) - break; - } - } - PG_CATCH(); - { + /* + * Emulate PQexec()'s behavior of returning the last result when there + * are many. + */ PQclear(lastResult); - PG_RE_THROW(); - } - PG_END_TRY(); + lastResult = result; + if (PQresultStatus(lastResult) == PGRES_COPY_IN || + PQresultStatus(lastResult) == PGRES_COPY_OUT || + PQresultStatus(lastResult) == PGRES_COPY_BOTH || + PQstatus(conn) == CONNECTION_BAD) + break; + } return lastResult; } @@ -454,4 +433,45 @@ exit: ; return error; } +/* + * libpqsrv_notice_receiver + * + * Custom notice receiver for libpq connections. + * + * This function is intended to be set via PQsetNoticeReceiver() so that + * NOTICE, WARNING, and similar messages from the connection are reported via + * ereport(), instead of being printed to stderr. + * + * Because this will be called from libpq with a "real" (not wrapped) + * PGresult, we need to temporarily ignore libpq-be-fe.h's wrapper macros + * for PGresult and also PQresultErrorMessage, and put back the wrappers + * afterwards. That's not pretty, but there seems no better alternative. + */ +#undef PGresult +#undef PQresultErrorMessage + +static inline void +libpqsrv_notice_receiver(void *arg, const PGresult *res) +{ + const char *message; + int len; + const char *prefix = (const char *) arg; + + /* + * Trim the trailing newline from the message text returned from + * PQresultErrorMessage(), as it always includes one, to produce cleaner + * log output. + */ + message = PQresultErrorMessage(res); + len = strlen(message); + if (len > 0 && message[len - 1] == '\n') + len--; + + ereport(LOG, + errmsg_internal("%s: %.*s", prefix, len, message)); +} + +#define PGresult libpqsrv_PGresult +#define PQresultErrorMessage libpqsrv_PQresultErrorMessage + #endif /* LIBPQ_BE_FE_HELPERS_H */ diff --git a/src/include/libpq/libpq-be-fe.h b/src/include/libpq/libpq-be-fe.h new file mode 100644 index 00000000000..e3f796b0230 --- /dev/null +++ b/src/include/libpq/libpq-be-fe.h @@ -0,0 +1,259 @@ +/*------------------------------------------------------------------------- + * + * libpq-be-fe.h + * Wrapper functions for using libpq in extensions + * + * Code built directly into the backend is not allowed to link to libpq + * directly. Extension code is allowed to use libpq however. One of the + * main risks in doing so is leaking the malloc-allocated structures + * returned by libpq, causing a process-lifespan memory leak. + * + * This file provides wrapper objects to help in building memory-safe code. + * A PGresult object wrapped this way acts much as if it were palloc'd: + * it will go away when the specified context is reset or deleted. + * We might later extend the concept to other objects such as PGconns. + * + * See also the libpq-be-fe-helpers.h file, which provides additional + * facilities built on top of this one. + * + * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/libpq/libpq-be-fe.h + * + *------------------------------------------------------------------------- + */ +#ifndef LIBPQ_BE_FE_H +#define LIBPQ_BE_FE_H + +/* + * Despite the name, BUILDING_DLL is set only when building code directly part + * of the backend. Which also is where libpq isn't allowed to be + * used. Obviously this doesn't protect against libpq-fe.h getting included + * otherwise, but perhaps still protects against a few mistakes... + */ +#ifdef BUILDING_DLL +#error "libpq may not be used in code directly built into the backend" +#endif + +#include "libpq-fe.h" + +/* + * Memory-context-safe wrapper object for a PGresult. + */ +typedef struct libpqsrv_PGresult +{ + PGresult *res; /* the wrapped PGresult */ + MemoryContext ctx; /* the MemoryContext it's attached to */ + MemoryContextCallback cb; /* the callback that implements freeing */ +} libpqsrv_PGresult; + + +/* + * Wrap the given PGresult in a libpqsrv_PGresult object, so that it will + * go away automatically if the current memory context is reset or deleted. + * + * To avoid potential memory leaks, backend code must always apply this + * immediately to the output of any PGresult-yielding libpq function. + */ +static inline libpqsrv_PGresult * +libpqsrv_PQwrap(PGresult *res) +{ + libpqsrv_PGresult *bres; + MemoryContext ctx = CurrentMemoryContext; + + /* We pass through a NULL result as-is, since there's nothing to free */ + if (res == NULL) + return NULL; + /* Attempt to allocate the wrapper ... this had better not throw error */ + bres = (libpqsrv_PGresult *) + MemoryContextAllocExtended(ctx, + sizeof(libpqsrv_PGresult), + MCXT_ALLOC_NO_OOM); + /* If we failed to allocate a wrapper, free the PGresult before failing */ + if (bres == NULL) + { + PQclear(res); + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); + } + /* Okay, set up the wrapper */ + bres->res = res; + bres->ctx = ctx; + bres->cb.func = (MemoryContextCallbackFunction) PQclear; + bres->cb.arg = res; + MemoryContextRegisterResetCallback(ctx, &bres->cb); + return bres; +} + +/* + * Free a wrapped PGresult, after detaching it from the memory context. + * Like PQclear(), allow the argument to be NULL. + */ +static inline void +libpqsrv_PQclear(libpqsrv_PGresult *bres) +{ + if (bres) + { + MemoryContextUnregisterResetCallback(bres->ctx, &bres->cb); + PQclear(bres->res); + pfree(bres); + } +} + +/* + * Move a wrapped PGresult to have a different parent context. + */ +static inline libpqsrv_PGresult * +libpqsrv_PGresultSetParent(libpqsrv_PGresult *bres, MemoryContext ctx) +{ + libpqsrv_PGresult *newres; + + /* We pass through a NULL result as-is */ + if (bres == NULL) + return NULL; + /* Make a new wrapper in the target context, raising error on OOM */ + newres = (libpqsrv_PGresult *) + MemoryContextAlloc(ctx, sizeof(libpqsrv_PGresult)); + /* Okay, set up the new wrapper */ + newres->res = bres->res; + newres->ctx = ctx; + newres->cb.func = (MemoryContextCallbackFunction) PQclear; + newres->cb.arg = bres->res; + MemoryContextRegisterResetCallback(ctx, &newres->cb); + /* Disarm and delete the old wrapper */ + MemoryContextUnregisterResetCallback(bres->ctx, &bres->cb); + pfree(bres); + return newres; +} + +/* + * Convenience wrapper for PQgetResult. + * + * We could supply wrappers for other PGresult-returning functions too, + * but at present there's no need. + */ +static inline libpqsrv_PGresult * +libpqsrv_PQgetResult(PGconn *conn) +{ + return libpqsrv_PQwrap(PQgetResult(conn)); +} + +/* + * Accessor functions for libpqsrv_PGresult. While it's not necessary to use + * these, they emulate the behavior of the underlying libpq functions when + * passed a NULL pointer. This is particularly important for PQresultStatus, + * which is often the first check on a result. + */ + +static inline ExecStatusType +libpqsrv_PQresultStatus(const libpqsrv_PGresult *res) +{ + if (!res) + return PGRES_FATAL_ERROR; + return PQresultStatus(res->res); +} + +static inline const char * +libpqsrv_PQresultErrorMessage(const libpqsrv_PGresult *res) +{ + if (!res) + return ""; + return PQresultErrorMessage(res->res); +} + +static inline char * +libpqsrv_PQresultErrorField(const libpqsrv_PGresult *res, int fieldcode) +{ + if (!res) + return NULL; + return PQresultErrorField(res->res, fieldcode); +} + +static inline char * +libpqsrv_PQcmdStatus(const libpqsrv_PGresult *res) +{ + if (!res) + return NULL; + return PQcmdStatus(res->res); +} + +static inline int +libpqsrv_PQntuples(const libpqsrv_PGresult *res) +{ + if (!res) + return 0; + return PQntuples(res->res); +} + +static inline int +libpqsrv_PQnfields(const libpqsrv_PGresult *res) +{ + if (!res) + return 0; + return PQnfields(res->res); +} + +static inline char * +libpqsrv_PQgetvalue(const libpqsrv_PGresult *res, int tup_num, int field_num) +{ + if (!res) + return NULL; + return PQgetvalue(res->res, tup_num, field_num); +} + +static inline int +libpqsrv_PQgetlength(const libpqsrv_PGresult *res, int tup_num, int field_num) +{ + if (!res) + return 0; + return PQgetlength(res->res, tup_num, field_num); +} + +static inline int +libpqsrv_PQgetisnull(const libpqsrv_PGresult *res, int tup_num, int field_num) +{ + if (!res) + return 1; /* pretend it is null */ + return PQgetisnull(res->res, tup_num, field_num); +} + +static inline char * +libpqsrv_PQfname(const libpqsrv_PGresult *res, int field_num) +{ + if (!res) + return NULL; + return PQfname(res->res, field_num); +} + +static inline const char * +libpqsrv_PQcmdTuples(const libpqsrv_PGresult *res) +{ + if (!res) + return ""; + return PQcmdTuples(res->res); +} + +/* + * Redefine these libpq entry point names concerned with PGresults so that + * they will operate on libpqsrv_PGresults instead. This avoids needing to + * convert a lot of pre-existing code, and reduces the notational differences + * between frontend and backend libpq-using code. + */ +#define PGresult libpqsrv_PGresult +#define PQclear libpqsrv_PQclear +#define PQgetResult libpqsrv_PQgetResult +#define PQresultStatus libpqsrv_PQresultStatus +#define PQresultErrorMessage libpqsrv_PQresultErrorMessage +#define PQresultErrorField libpqsrv_PQresultErrorField +#define PQcmdStatus libpqsrv_PQcmdStatus +#define PQntuples libpqsrv_PQntuples +#define PQnfields libpqsrv_PQnfields +#define PQgetvalue libpqsrv_PQgetvalue +#define PQgetlength libpqsrv_PQgetlength +#define PQgetisnull libpqsrv_PQgetisnull +#define PQfname libpqsrv_PQfname +#define PQcmdTuples libpqsrv_PQcmdTuples + +#endif /* LIBPQ_BE_FE_H */ diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 6567759595d..e5dd15098f6 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -179,6 +179,9 @@ typedef struct PlannerGlobal /* partition descriptors */ PartitionDirectory partition_directory pg_node_attr(read_write_ignore); + + /* hash table for NOT NULL attnums of relations */ + struct HTAB *rel_notnullatts_hash pg_node_attr(read_write_ignore); } PlannerGlobal; /* macro for fetching the Plan associated with a SubPlan node */ @@ -719,6 +722,9 @@ typedef struct PartitionSchemeData *PartitionScheme; * the attribute is needed as part of final targetlist * attr_widths - cache space for per-attribute width estimates; * zero means not computed yet + * notnullattnums - zero-based set containing attnums of NOT NULL + * columns (not populated for rels corresponding to + * non-partitioned inh==true RTEs) * nulling_relids - relids of outer joins that can null this rel * lateral_vars - lateral cross-references of rel, if any (list of * Vars and PlaceHolderVars) @@ -952,11 +958,7 @@ typedef struct RelOptInfo Relids *attr_needed pg_node_attr(read_write_ignore); /* array indexed [min_attr .. max_attr] */ int32 *attr_widths pg_node_attr(read_write_ignore); - - /* - * Zero-based set containing attnums of NOT NULL columns. Not populated - * for rels corresponding to non-partitioned inh==true RTEs. - */ + /* zero-based set containing attnums of NOT NULL columns */ Bitmapset *notnullattnums; /* relids of outer joins that can null this baserel */ Relids nulling_relids; diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 4f59e30d62d..46e2e09ea35 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -29,6 +29,20 @@ */ /* ---------------- + * CachedPlanType + * + * CachedPlanType identifies whether a PlannedStmt is a cached plan, and if + * so, whether it is generic or custom. + * ---------------- + */ +typedef enum CachedPlanType +{ + PLAN_CACHE_NONE = 0, /* Not a cached plan */ + PLAN_CACHE_GENERIC, /* Generic cached plan */ + PLAN_CACHE_CUSTOM, /* Custom cached plan */ +} CachedPlanType; + +/* ---------------- * PlannedStmt node * * The output of the planner is a Plan tree headed by a PlannedStmt node. @@ -58,6 +72,9 @@ typedef struct PlannedStmt /* plan identifier (can be set by plugins) */ int64 planId; + /* type of cached plan */ + CachedPlanType cached_plan_type; + /* is it insert|update|delete|merge RETURNING? */ bool hasReturning; diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h index 546828b54bd..37bc13c2cbd 100644 --- a/src/include/optimizer/optimizer.h +++ b/src/include/optimizer/optimizer.h @@ -154,6 +154,8 @@ extern Node *estimate_expression_value(PlannerInfo *root, Node *node); extern Expr *evaluate_expr(Expr *expr, Oid result_type, int32 result_typmod, Oid result_collation); +extern bool var_is_nonnullable(PlannerInfo *root, Var *var, bool use_rel_info); + extern List *expand_function_arguments(List *args, bool include_out_arguments, Oid result_type, struct HeapTupleData *func_tuple); diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h index cd74e4b1e8b..d6f6f4ad2d7 100644 --- a/src/include/optimizer/plancat.h +++ b/src/include/optimizer/plancat.h @@ -28,6 +28,10 @@ extern PGDLLIMPORT get_relation_info_hook_type get_relation_info_hook; extern void get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, RelOptInfo *rel); +extern void get_relation_notnullatts(PlannerInfo *root, Relation relation); + +extern Relids find_relation_notnullatts(PlannerInfo *root, Oid relid); + extern List *infer_arbiter_indexes(PlannerInfo *root); extern void estimate_rel_size(Relation rel, int32 *attr_widths, diff --git a/src/include/optimizer/prep.h b/src/include/optimizer/prep.h index df56202777c..4fbecdb4462 100644 --- a/src/include/optimizer/prep.h +++ b/src/include/optimizer/prep.h @@ -22,10 +22,10 @@ * prototypes for prepjointree.c */ extern void transform_MERGE_to_join(Query *parse); +extern Query *preprocess_relation_rtes(PlannerInfo *root); extern void replace_empty_jointree(Query *parse); extern void pull_up_sublinks(PlannerInfo *root); extern void preprocess_function_rtes(PlannerInfo *root); -extern Query *expand_virtual_generated_columns(PlannerInfo *root); extern void pull_up_subqueries(PlannerInfo *root); extern void flatten_simple_union_all(PlannerInfo *root); extern void reduce_outer_joins(PlannerInfo *root); diff --git a/src/include/port/solaris.h b/src/include/port/solaris.h index e63a3bd824d..8ff40007c7f 100644 --- a/src/include/port/solaris.h +++ b/src/include/port/solaris.h @@ -24,3 +24,12 @@ #if defined(__i386__) #include <sys/isa_defs.h> #endif + +/* + * On original Solaris, PAM conversation procs lack a "const" in their + * declaration; but recent OpenIndiana versions put it there by default. + * The least messy way to deal with this is to define _PAM_LEGACY_NONCONST, + * which causes OpenIndiana to declare pam_conv per the Solaris tradition, + * and also use that symbol to control omitting the "const" in our own code. + */ +#define _PAM_LEGACY_NONCONST 1 diff --git a/src/include/replication/logicallauncher.h b/src/include/replication/logicallauncher.h index 82b202f3305..b29453e8e4f 100644 --- a/src/include/replication/logicallauncher.h +++ b/src/include/replication/logicallauncher.h @@ -25,8 +25,11 @@ extern void ApplyLauncherShmemInit(void); extern void ApplyLauncherForgetWorkerStartTime(Oid subid); extern void ApplyLauncherWakeupAtCommit(void); +extern void ApplyLauncherWakeup(void); extern void AtEOXact_ApplyLauncher(bool isCommit); +extern void CreateConflictDetectionSlot(void); + extern bool IsLogicalLauncher(void); extern pid_t GetLeaderApplyWorkerPid(pid_t pid); diff --git a/src/include/replication/slot.h b/src/include/replication/slot.h index 76aeeb92242..e8fc342d1a9 100644 --- a/src/include/replication/slot.h +++ b/src/include/replication/slot.h @@ -21,6 +21,13 @@ #define PG_REPLSLOT_DIR "pg_replslot" /* + * The reserved name for a replication slot used to retain dead tuples for + * conflict detection in logical replication. See + * maybe_advance_nonremovable_xid() for detail. + */ +#define CONFLICT_DETECTION_SLOT "pg_conflict_detection" + +/* * Behaviour of replication slots, upon release or crash. * * Slots marked as PERSISTENT are crash-safe and will not be dropped when @@ -220,6 +227,25 @@ typedef struct ReplicationSlot * Latest restart_lsn that has been flushed to disk. For persistent slots * the flushed LSN should be taken into account when calculating the * oldest LSN for WAL segments removal. + * + * Do not assume that restart_lsn will always move forward, i.e., that the + * previously flushed restart_lsn is always behind data.restart_lsn. In + * streaming replication using a physical slot, the restart_lsn is updated + * based on the flushed WAL position reported by the walreceiver. + * + * This replication mode allows duplicate WAL records to be received and + * overwritten. If the walreceiver receives older WAL records and then + * reports them as flushed to the walsender, the restart_lsn may appear to + * move backward. + * + * This typically occurs at the beginning of replication. One reason is + * that streaming replication starts at the beginning of a segment, so, if + * restart_lsn is in the middle of a segment, it will be updated to an + * earlier LSN, see RequestXLogStreaming. Another reason is that the + * walreceiver chooses its startpoint based on the replayed LSN, so, if + * some records have been received but not yet applied, they will be + * received again and leads to updating the restart_lsn to an earlier + * position. */ XLogRecPtr last_saved_restart_lsn; @@ -292,7 +318,9 @@ extern void ReplicationSlotMarkDirty(void); /* misc stuff */ extern void ReplicationSlotInitialize(void); -extern bool ReplicationSlotValidateName(const char *name, int elevel); +extern bool ReplicationSlotValidateName(const char *name, + bool allow_reserved_name, + int elevel); extern void ReplicationSlotReserveWal(void); extern void ReplicationSlotsComputeRequiredXmin(bool already_locked); extern void ReplicationSlotsComputeRequiredLSN(void); diff --git a/src/include/replication/worker_internal.h b/src/include/replication/worker_internal.h index 30b2775952c..0c7b8440a61 100644 --- a/src/include/replication/worker_internal.h +++ b/src/include/replication/worker_internal.h @@ -86,6 +86,16 @@ typedef struct LogicalRepWorker /* Indicates whether apply can be performed in parallel. */ bool parallel_apply; + /* + * The changes made by this and later transactions must be retained to + * ensure reliable conflict detection during the apply phase. + * + * The logical replication launcher manages an internal replication slot + * named "pg_conflict_detection". It asynchronously collects this ID to + * decide when to advance the xmin value of the slot. + */ + TransactionId oldest_nonremovable_xid; + /* Stats. */ XLogRecPtr last_lsn; TimestampTz last_send_time; @@ -245,7 +255,8 @@ extern List *logicalrep_workers_find(Oid subid, bool only_running, extern bool logicalrep_worker_launch(LogicalRepWorkerType wtype, Oid dbid, Oid subid, const char *subname, Oid userid, Oid relid, - dsm_handle subworker_dsm); + dsm_handle subworker_dsm, + bool retain_dead_tuples); extern void logicalrep_worker_stop(Oid subid, Oid relid); extern void logicalrep_pa_worker_stop(ParallelApplyWorkerInfo *winfo); extern void logicalrep_worker_wakeup(Oid subid, Oid relid); diff --git a/src/include/storage/aio.h b/src/include/storage/aio.h index e7a0a234b6c..2933eea0649 100644 --- a/src/include/storage/aio.h +++ b/src/include/storage/aio.h @@ -201,7 +201,7 @@ typedef enum PgAioHandleCallbackID } PgAioHandleCallbackID; #define PGAIO_HCB_MAX PGAIO_HCB_LOCAL_BUFFER_READV -StaticAssertDecl(PGAIO_HCB_MAX <= (1 << PGAIO_RESULT_ID_BITS), +StaticAssertDecl(PGAIO_HCB_MAX < (1 << PGAIO_RESULT_ID_BITS), "PGAIO_HCB_MAX is too big for PGAIO_RESULT_ID_BITS"); diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index 08a72569ae5..5e717765764 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -176,51 +176,23 @@ extern void LWLockInitialize(LWLock *lock, int tranche_id); * Every tranche ID less than NUM_INDIVIDUAL_LWLOCKS is reserved; also, * we reserve additional tranche IDs for builtin tranches not included in * the set of individual LWLocks. A call to LWLockNewTrancheId will never - * return a value less than LWTRANCHE_FIRST_USER_DEFINED. + * return a value less than LWTRANCHE_FIRST_USER_DEFINED. The actual list of + * built-in tranches is kept in lwlocklist.h. */ typedef enum BuiltinTrancheIds { - LWTRANCHE_XACT_BUFFER = NUM_INDIVIDUAL_LWLOCKS, - LWTRANCHE_COMMITTS_BUFFER, - LWTRANCHE_SUBTRANS_BUFFER, - LWTRANCHE_MULTIXACTOFFSET_BUFFER, - LWTRANCHE_MULTIXACTMEMBER_BUFFER, - LWTRANCHE_NOTIFY_BUFFER, - LWTRANCHE_SERIAL_BUFFER, - LWTRANCHE_WAL_INSERT, - LWTRANCHE_BUFFER_CONTENT, - LWTRANCHE_REPLICATION_ORIGIN_STATE, - LWTRANCHE_REPLICATION_SLOT_IO, - LWTRANCHE_LOCK_FASTPATH, - LWTRANCHE_BUFFER_MAPPING, - LWTRANCHE_LOCK_MANAGER, - LWTRANCHE_PREDICATE_LOCK_MANAGER, - LWTRANCHE_PARALLEL_HASH_JOIN, - LWTRANCHE_PARALLEL_BTREE_SCAN, - LWTRANCHE_PARALLEL_QUERY_DSA, - LWTRANCHE_PER_SESSION_DSA, - LWTRANCHE_PER_SESSION_RECORD_TYPE, - LWTRANCHE_PER_SESSION_RECORD_TYPMOD, - LWTRANCHE_SHARED_TUPLESTORE, - LWTRANCHE_SHARED_TIDBITMAP, - LWTRANCHE_PARALLEL_APPEND, - LWTRANCHE_PER_XACT_PREDICATE_LIST, - LWTRANCHE_PGSTATS_DSA, - LWTRANCHE_PGSTATS_HASH, - LWTRANCHE_PGSTATS_DATA, - LWTRANCHE_LAUNCHER_DSA, - LWTRANCHE_LAUNCHER_HASH, - LWTRANCHE_DSM_REGISTRY_DSA, - LWTRANCHE_DSM_REGISTRY_HASH, - LWTRANCHE_COMMITTS_SLRU, - LWTRANCHE_MULTIXACTMEMBER_SLRU, - LWTRANCHE_MULTIXACTOFFSET_SLRU, - LWTRANCHE_NOTIFY_SLRU, - LWTRANCHE_SERIAL_SLRU, - LWTRANCHE_SUBTRANS_SLRU, - LWTRANCHE_XACT_SLRU, - LWTRANCHE_PARALLEL_VACUUM_DSA, - LWTRANCHE_AIO_URING_COMPLETION, + /* + * LWTRANCHE_INVALID is an unused value that only exists to initialize the + * rest of the tranches to appropriate values. + */ + LWTRANCHE_INVALID = NUM_INDIVIDUAL_LWLOCKS - 1, + +#define PG_LWLOCK(id, name) +#define PG_LWLOCKTRANCHE(id, name) LWTRANCHE_##id, +#include "storage/lwlocklist.h" +#undef PG_LWLOCK +#undef PG_LWLOCKTRANCHE + LWTRANCHE_FIRST_USER_DEFINED, } BuiltinTrancheIds; diff --git a/src/include/storage/lwlocklist.h b/src/include/storage/lwlocklist.h index a9681738146..208d2e3a8ed 100644 --- a/src/include/storage/lwlocklist.h +++ b/src/include/storage/lwlocklist.h @@ -2,9 +2,10 @@ * * lwlocklist.h * - * The predefined LWLock list is kept in its own source file for use by - * automatic tools. The exact representation of a keyword is determined by - * the PG_LWLOCK macro, which is not defined in this file; it can be + * The list of predefined LWLocks and built-in LWLock tranches is kept in + * its own source file for use by automatic tools. The exact + * representation of a keyword is determined by the PG_LWLOCK and + * PG_LWLOCKTRANCHE macros, which are not defined in this file; they can be * defined by the caller for special purposes. * * Also, generate-lwlocknames.pl processes this file to create lwlocknames.h. @@ -84,3 +85,53 @@ PG_LWLOCK(50, DSMRegistry) PG_LWLOCK(51, InjectionPoint) PG_LWLOCK(52, SerialControl) PG_LWLOCK(53, AioWorkerSubmissionQueue) + +/* + * There also exist several built-in LWLock tranches. As with the predefined + * LWLocks, be sure to update the WaitEventLWLock section of + * src/backend/utils/activity/wait_event_names.txt when modifying this list. + * + * Note that the IDs here (the first value) don't include the LWTRANCHE_ + * prefix. It's added elsewhere. + */ +PG_LWLOCKTRANCHE(XACT_BUFFER, XactBuffer) +PG_LWLOCKTRANCHE(COMMITTS_BUFFER, CommitTsBuffer) +PG_LWLOCKTRANCHE(SUBTRANS_BUFFER, SubtransBuffer) +PG_LWLOCKTRANCHE(MULTIXACTOFFSET_BUFFER, MultiXactOffsetBuffer) +PG_LWLOCKTRANCHE(MULTIXACTMEMBER_BUFFER, MultiXactMemberBuffer) +PG_LWLOCKTRANCHE(NOTIFY_BUFFER, NotifyBuffer) +PG_LWLOCKTRANCHE(SERIAL_BUFFER, SerialBuffer) +PG_LWLOCKTRANCHE(WAL_INSERT, WALInsert) +PG_LWLOCKTRANCHE(BUFFER_CONTENT, BufferContent) +PG_LWLOCKTRANCHE(REPLICATION_ORIGIN_STATE, ReplicationOriginState) +PG_LWLOCKTRANCHE(REPLICATION_SLOT_IO, ReplicationSlotIO) +PG_LWLOCKTRANCHE(LOCK_FASTPATH, LockFastPath) +PG_LWLOCKTRANCHE(BUFFER_MAPPING, BufferMapping) +PG_LWLOCKTRANCHE(LOCK_MANAGER, LockManager) +PG_LWLOCKTRANCHE(PREDICATE_LOCK_MANAGER, PredicateLockManager) +PG_LWLOCKTRANCHE(PARALLEL_HASH_JOIN, ParallelHashJoin) +PG_LWLOCKTRANCHE(PARALLEL_BTREE_SCAN, ParallelBtreeScan) +PG_LWLOCKTRANCHE(PARALLEL_QUERY_DSA, ParallelQueryDSA) +PG_LWLOCKTRANCHE(PER_SESSION_DSA, PerSessionDSA) +PG_LWLOCKTRANCHE(PER_SESSION_RECORD_TYPE, PerSessionRecordType) +PG_LWLOCKTRANCHE(PER_SESSION_RECORD_TYPMOD, PerSessionRecordTypmod) +PG_LWLOCKTRANCHE(SHARED_TUPLESTORE, SharedTupleStore) +PG_LWLOCKTRANCHE(SHARED_TIDBITMAP, SharedTidBitmap) +PG_LWLOCKTRANCHE(PARALLEL_APPEND, ParallelAppend) +PG_LWLOCKTRANCHE(PER_XACT_PREDICATE_LIST, PerXactPredicateList) +PG_LWLOCKTRANCHE(PGSTATS_DSA, PgStatsDSA) +PG_LWLOCKTRANCHE(PGSTATS_HASH, PgStatsHash) +PG_LWLOCKTRANCHE(PGSTATS_DATA, PgStatsData) +PG_LWLOCKTRANCHE(LAUNCHER_DSA, LogicalRepLauncherDSA) +PG_LWLOCKTRANCHE(LAUNCHER_HASH, LogicalRepLauncherHash) +PG_LWLOCKTRANCHE(DSM_REGISTRY_DSA, DSMRegistryDSA) +PG_LWLOCKTRANCHE(DSM_REGISTRY_HASH, DSMRegistryHash) +PG_LWLOCKTRANCHE(COMMITTS_SLRU, CommitTsSLRU) +PG_LWLOCKTRANCHE(MULTIXACTOFFSET_SLRU, MultiXactOffsetSLRU) +PG_LWLOCKTRANCHE(MULTIXACTMEMBER_SLRU, MultiXactMemberSLRU) +PG_LWLOCKTRANCHE(NOTIFY_SLRU, NotifySLRU) +PG_LWLOCKTRANCHE(SERIAL_SLRU, SerialSLRU) +PG_LWLOCKTRANCHE(SUBTRANS_SLRU, SubtransSLRU) +PG_LWLOCKTRANCHE(XACT_SLRU, XactSLRU) +PG_LWLOCKTRANCHE(PARALLEL_VACUUM_DSA, ParallelVacuumDSA) +PG_LWLOCKTRANCHE(AIO_URING_COMPLETION, AioUringCompletion) diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index 9f9b3fcfbf1..c6f5ebceefd 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -130,9 +130,17 @@ extern PGDLLIMPORT int FastPathLockGroupsPerBackend; * the checkpoint are actually destroyed on disk. Replay can cope with a file * or block that doesn't exist, but not with a block that has the wrong * contents. + * + * Setting DELAY_CHKPT_IN_COMMIT is similar to setting DELAY_CHKPT_START, but + * it explicitly indicates that the reason for delaying the checkpoint is due + * to a transaction being within a critical commit section. We need this new + * flag to ensure all the transactions that have acquired commit timestamp are + * finished before we allow the logical replication client to advance its xid + * which is used to hold back dead rows for conflict detection. */ #define DELAY_CHKPT_START (1<<0) #define DELAY_CHKPT_COMPLETE (1<<1) +#define DELAY_CHKPT_IN_COMMIT (DELAY_CHKPT_START | 1<<2) typedef enum { diff --git a/src/include/storage/procarray.h b/src/include/storage/procarray.h index e4877d88e8f..2f4ae06c279 100644 --- a/src/include/storage/procarray.h +++ b/src/include/storage/procarray.h @@ -55,7 +55,8 @@ extern RunningTransactions GetRunningTransactionData(void); extern bool TransactionIdIsInProgress(TransactionId xid); extern TransactionId GetOldestNonRemovableTransactionId(Relation rel); extern TransactionId GetOldestTransactionIdConsideredRunning(void); -extern TransactionId GetOldestActiveTransactionId(void); +extern TransactionId GetOldestActiveTransactionId(bool inCommitOnly, + bool allDbs); extern TransactionId GetOldestSafeDecodingTransactionId(bool catalogOnly); extern void GetReplicationHorizons(TransactionId *xmin, TransactionId *catalog_xmin); diff --git a/src/include/utils/palloc.h b/src/include/utils/palloc.h index e1b42267b22..039b9cba61a 100644 --- a/src/include/utils/palloc.h +++ b/src/include/utils/palloc.h @@ -133,6 +133,8 @@ MemoryContextSwitchTo(MemoryContext context) /* Registration of memory context reset/delete callbacks */ extern void MemoryContextRegisterResetCallback(MemoryContext context, MemoryContextCallback *cb); +extern void MemoryContextUnregisterResetCallback(MemoryContext context, + MemoryContextCallback *cb); /* * These are like standard strdup() except the copied string is diff --git a/src/include/utils/pgstat_kind.h b/src/include/utils/pgstat_kind.h index f44169fd5a3..eb5f0b3ae6d 100644 --- a/src/include/utils/pgstat_kind.h +++ b/src/include/utils/pgstat_kind.h @@ -18,7 +18,7 @@ /* Range of IDs allowed, for built-in and custom kinds */ #define PGSTAT_KIND_MIN 1 /* Minimum ID allowed */ -#define PGSTAT_KIND_MAX 256 /* Maximum ID allowed */ +#define PGSTAT_KIND_MAX 32 /* Maximum ID allowed */ /* use 0 for INVALID, to catch zero-initialized data */ #define PGSTAT_KIND_INVALID 0 @@ -46,7 +46,7 @@ /* Custom stats kinds */ /* Range of IDs allowed for custom stats kinds */ -#define PGSTAT_KIND_CUSTOM_MIN 128 +#define PGSTAT_KIND_CUSTOM_MIN 24 #define PGSTAT_KIND_CUSTOM_MAX PGSTAT_KIND_MAX #define PGSTAT_KIND_CUSTOM_SIZE (PGSTAT_KIND_CUSTOM_MAX - PGSTAT_KIND_CUSTOM_MIN + 1) @@ -55,7 +55,7 @@ * development and have not reserved their own unique kind ID yet. See: * https://wiki.postgresql.org/wiki/CustomCumulativeStats */ -#define PGSTAT_KIND_EXPERIMENTAL 128 +#define PGSTAT_KIND_EXPERIMENTAL 24 static inline bool pgstat_is_kind_builtin(PgStat_Kind kind) |