diff options
| author | Noah Misch | 2020-03-21 16:38:26 +0000 |
|---|---|---|
| committer | Noah Misch | 2020-03-21 16:38:34 +0000 |
| commit | 03b89f1949a99797ed21b0dabdf0aba1bbee8d0b (patch) | |
| tree | d599b3db2c246f4b263c54325458f301ed5e965e /src/include | |
| parent | ae86e46c3b7b90903d89b65385e2b77d2c1cf63e (diff) | |
Skip WAL for new relfilenodes, under wal_level=minimal.
Until now, only selected bulk operations (e.g. COPY) did this. If a
given relfilenode received both a WAL-skipping COPY and a WAL-logged
operation (e.g. INSERT), recovery could lose tuples from the COPY. See
src/backend/access/transam/README section "Skipping WAL for New
RelFileNode" for the new coding rules. Maintainers of table access
methods should examine that section.
To maintain data durability, just before commit, we choose between an
fsync of the relfilenode and copying its contents to WAL. A new GUC,
wal_skip_threshold, guides that choice. If this change slows a workload
that creates small, permanent relfilenodes under wal_level=minimal, try
adjusting wal_skip_threshold. Users setting a timeout on COMMIT may
need to adjust that timeout, and log_min_duration_statement analysis
will reflect time consumption moving to COMMIT from commands like COPY.
Internally, this requires a reliable determination of whether
RollbackAndReleaseCurrentSubTransaction() would unlink a relation's
current relfilenode. Introduce rd_firstRelfilenodeSubid. Amend the
specification of rd_createSubid such that the field is zero when a new
rel has an old rd_node. Make relcache.c retain entries for certain
dropped relations until end of transaction.
Back-patch to 9.5 (all supported versions). This introduces a new WAL
record type, XLOG_GIST_ASSIGN_LSN, without bumping XLOG_PAGE_MAGIC. As
always, update standby systems before master systems. This changes
sizeof(RelationData) and sizeof(IndexStmt), breaking binary
compatibility for affected extensions. (The most recent commit to
affect the same class of extensions was
089e4d405d0f3b94c74a2c6a54357a84a681754b.)
Kyotaro Horiguchi, reviewed (in earlier, similar versions) by Robert
Haas. Heikki Linnakangas and Michael Paquier implemented earlier
designs that materially clarified the problem. Reviewed, in earlier
designs, by Andrew Dunstan, Andres Freund, Alvaro Herrera, Tom Lane,
Fujii Masao, and Simon Riggs. Reported by Martijn van Oosterhout.
Discussion: https://postgr.es/m/20150702220524.GA9392@svana.org
Diffstat (limited to 'src/include')
| -rw-r--r-- | src/include/access/gist_private.h | 2 | ||||
| -rw-r--r-- | src/include/access/gistxlog.h | 1 | ||||
| -rw-r--r-- | src/include/access/heapam.h | 1 | ||||
| -rw-r--r-- | src/include/access/rewriteheap.h | 2 | ||||
| -rw-r--r-- | src/include/catalog/storage.h | 6 | ||||
| -rw-r--r-- | src/include/nodes/parsenodes.h | 3 | ||||
| -rw-r--r-- | src/include/storage/bufmgr.h | 4 | ||||
| -rw-r--r-- | src/include/storage/lock.h | 3 | ||||
| -rw-r--r-- | src/include/storage/smgr.h | 1 | ||||
| -rw-r--r-- | src/include/utils/rel.h | 57 | ||||
| -rw-r--r-- | src/include/utils/relcache.h | 8 |
11 files changed, 71 insertions, 17 deletions
diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h index 41f496c2580..355572d3f7e 100644 --- a/src/include/access/gist_private.h +++ b/src/include/access/gist_private.h @@ -425,6 +425,8 @@ extern XLogRecPtr gistXLogSplit(bool page_is_leaf, BlockNumber origrlink, GistNSN oldnsn, Buffer leftchild, bool markfollowright); +extern XLogRecPtr gistXLogAssignLSN(void); + /* gistget.c */ extern bool gistgettuple(IndexScanDesc scan, ScanDirection dir); extern int64 gistgetbitmap(IndexScanDesc scan, TIDBitmap *tbm); diff --git a/src/include/access/gistxlog.h b/src/include/access/gistxlog.h index 1a2b9496d0d..9ff3dee5198 100644 --- a/src/include/access/gistxlog.h +++ b/src/include/access/gistxlog.h @@ -23,6 +23,7 @@ /* #define XLOG_GIST_INSERT_COMPLETE 0x40 */ /* not used anymore */ #define XLOG_GIST_CREATE_INDEX 0x50 /* #define XLOG_GIST_PAGE_DELETE 0x60 */ /* not used anymore */ +#define XLOG_GIST_ASSIGN_LSN 0x70 /* nop, assign new LSN */ /* * Backup Blk 0: updated page. diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 40e153f71ad..96b8466a204 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -25,7 +25,6 @@ /* "options" flag bits for heap_insert */ -#define HEAP_INSERT_SKIP_WAL 0x0001 #define HEAP_INSERT_SKIP_FSM 0x0002 #define HEAP_INSERT_FROZEN 0x0004 #define HEAP_INSERT_SPECULATIVE 0x0008 diff --git a/src/include/access/rewriteheap.h b/src/include/access/rewriteheap.h index 6d7f669cbca..c3cc36897bc 100644 --- a/src/include/access/rewriteheap.h +++ b/src/include/access/rewriteheap.h @@ -23,7 +23,7 @@ typedef struct RewriteStateData *RewriteState; extern RewriteState begin_heap_rewrite(Relation OldHeap, Relation NewHeap, TransactionId OldestXmin, TransactionId FreezeXid, - MultiXactId MultiXactCutoff, bool use_wal); + MultiXactId MultiXactCutoff); extern void end_heap_rewrite(RewriteState state); extern void rewrite_heap_tuple(RewriteState state, HeapTuple oldTuple, HeapTuple newTuple); diff --git a/src/include/catalog/storage.h b/src/include/catalog/storage.h index ef52d858038..cb7a42ba395 100644 --- a/src/include/catalog/storage.h +++ b/src/include/catalog/storage.h @@ -18,16 +18,22 @@ #include "storage/relfilenode.h" #include "utils/relcache.h" +/* GUC variables */ +extern int wal_skip_threshold; + extern void RelationCreateStorage(RelFileNode rnode, char relpersistence); extern void RelationDropStorage(Relation rel); extern void RelationPreserveStorage(RelFileNode rnode, bool atCommit); +extern void RelationPreTruncate(Relation rel); extern void RelationTruncate(Relation rel, BlockNumber nblocks); +extern bool RelFileNodeSkippingWAL(RelFileNode rnode); /* * These functions used to be in storage/smgr/smgr.c, which explains the * naming */ extern void smgrDoPendingDeletes(bool isCommit); +extern void smgrDoPendingSyncs(bool isCommit); extern int smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr); extern void AtSubCommit_smgr(void); extern void AtSubAbort_smgr(void); diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 62d3683be5e..42acb722ae3 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -2738,6 +2738,9 @@ typedef struct IndexStmt bool transformed; /* true when transformIndexStmt is finished */ bool concurrent; /* should this be a concurrent index build? */ bool if_not_exists; /* just do nothing if index already exists? */ + SubTransactionId oldCreateSubid; /* rd_createSubid of oldNode */ + SubTransactionId oldFirstRelfilenodeSubid; /* rd_firstRelfilenodeSubid of + * oldNode */ } IndexStmt; /* ---------------------- diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index 3cce3906a0e..7a11c241a83 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -50,6 +50,9 @@ typedef enum /* forward declared, to avoid having to expose buf_internals.h here */ struct WritebackContext; +/* forward declared, to avoid including smgr.h here */ +struct SMgrRelationData; + /* in globals.c ... this duplicates miscadmin.h */ extern PGDLLIMPORT int NBuffers; @@ -190,6 +193,7 @@ extern BlockNumber RelationGetNumberOfBlocksInFork(Relation relation, ForkNumber forkNum); extern void FlushOneBuffer(Buffer buffer); extern void FlushRelationBuffers(Relation rel); +extern void FlushRelationsAllBuffers(struct SMgrRelationData **smgrs, int nrels); extern void FlushDatabaseBuffers(Oid dbid); extern void DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber forkNum, BlockNumber firstDelBlock); diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h index ff4df7fec51..57863722ce2 100644 --- a/src/include/storage/lock.h +++ b/src/include/storage/lock.h @@ -540,6 +540,9 @@ extern void LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks); extern void LockReleaseSession(LOCKMETHODID lockmethodid); extern void LockReleaseCurrentOwner(LOCALLOCK **locallocks, int nlocks); extern void LockReassignCurrentOwner(LOCALLOCK **locallocks, int nlocks); +#ifdef USE_ASSERT_CHECKING +extern HTAB *GetLockMethodLocalHash(void); +#endif extern bool LockHasWaiters(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock); extern VirtualTransactionId *GetLockConflicts(const LOCKTAG *locktag, diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index 0298ed1a2bc..af2e96525dd 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -91,6 +91,7 @@ extern void smgrcloseall(void); extern void smgrclosenode(RelFileNodeBackend rnode); extern void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo); extern void smgrdounlink(SMgrRelation reln, bool isRedo); +extern void smgrdosyncall(SMgrRelation *rels, int nrels); extern void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo); extern void smgrdounlinkfork(SMgrRelation reln, ForkNumber forknum, bool isRedo); extern void smgrextend(SMgrRelation reln, ForkNumber forknum, diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index 11a635964fd..f02d2b561b9 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -66,25 +66,43 @@ typedef struct RelationData /*---------- * rd_createSubid is the ID of the highest subtransaction the rel has - * survived into; or zero if the rel was not created in the current top - * transaction. This can be now be relied on, whereas previously it could - * be "forgotten" in earlier releases. Likewise, rd_newRelfilenodeSubid is - * the ID of the highest subtransaction the relfilenode change has - * survived into, or zero if not changed in the current transaction (or we - * have forgotten changing it). rd_newRelfilenodeSubid can be forgotten - * when a relation has multiple new relfilenodes within a single - * transaction, with one of them occurring in a subsequently aborted - * subtransaction, e.g. + * survived into or zero if the rel or its rd_node was created before the + * current top transaction. (IndexStmt.oldNode leads to the case of a new + * rel with an old rd_node.) rd_firstRelfilenodeSubid is the ID of the + * highest subtransaction an rd_node change has survived into or zero if + * rd_node matches the value it had at the start of the current top + * transaction. (Rolling back the subtransaction that + * rd_firstRelfilenodeSubid denotes would restore rd_node to the value it + * had at the start of the current top transaction. Rolling back any + * lower subtransaction would not.) Their accuracy is critical to + * RelationNeedsWAL(). + * + * rd_newRelfilenodeSubid is the ID of the highest subtransaction the + * most-recent relfilenode change has survived into or zero if not changed + * in the current transaction (or we have forgotten changing it). This + * field is accurate when non-zero, but it can be zero when a relation has + * multiple new relfilenodes within a single transaction, with one of them + * occurring in a subsequently aborted subtransaction, e.g. * BEGIN; * TRUNCATE t; * SAVEPOINT save; * TRUNCATE t; * ROLLBACK TO save; * -- rd_newRelfilenodeSubid is now forgotten + * + * If every rd_*Subid field is zero, they are read-only outside + * relcache.c. Files that trigger rd_node changes by updating + * pg_class.reltablespace and/or pg_class.relfilenode call + * RelationAssumeNewRelfilenode() to update rd_*Subid. + * + * rd_droppedSubid is the ID of the highest subtransaction that a drop of + * the rel has survived into. In entries visible outside relcache.c, this + * is always zero. */ SubTransactionId rd_createSubid; /* rel was created in current xact */ - SubTransactionId rd_newRelfilenodeSubid; /* new relfilenode assigned in - * current xact */ + SubTransactionId rd_newRelfilenodeSubid; /* highest subxact changing + * rd_node to current value */ + /* see end for rd_firstRelfilenodeSubid and rd_droppedSubid */ Form_pg_class rd_rel; /* RELATION tuple */ TupleDesc rd_att; /* tuple descriptor */ @@ -197,6 +215,10 @@ typedef struct RelationData /* placed here to avoid ABI break before v12: */ bool rd_partcheckvalid; /* true if list has been computed */ MemoryContext rd_partcheckcxt; /* private cxt for rd_partcheck, if any */ + + SubTransactionId rd_firstRelfilenodeSubid; /* highest subxact changing + * rd_node to any value */ + SubTransactionId rd_droppedSubid; /* dropped with another Subid set */ } RelationData; @@ -516,9 +538,16 @@ typedef struct ViewOptions /* * RelationNeedsWAL * True if relation needs WAL. - */ -#define RelationNeedsWAL(relation) \ - ((relation)->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT) + * + * Returns false if wal_level = minimal and this relation is created or + * truncated in the current transaction. See "Skipping WAL for New + * RelFileNode" in src/backend/access/transam/README. + */ +#define RelationNeedsWAL(relation) \ + ((relation)->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT && \ + (XLogIsNeeded() || \ + (relation->rd_createSubid == InvalidSubTransactionId && \ + relation->rd_firstRelfilenodeSubid == InvalidSubTransactionId))) /* * RelationUsesLocalBuffers diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h index 54394303a85..50005f91667 100644 --- a/src/include/utils/relcache.h +++ b/src/include/utils/relcache.h @@ -108,10 +108,11 @@ extern Relation RelationBuildLocalRelation(const char *relname, char relkind); /* - * Routine to manage assignment of new relfilenode to a relation + * Routines to manage assignment of new relfilenode to a relation */ extern void RelationSetNewRelfilenode(Relation relation, char persistence, TransactionId freezeXid, MultiXactId minmulti); +extern void RelationAssumeNewRelfilenode(Relation relation); /* * Routines for flushing/rebuilding relcache entries in various scenarios @@ -124,6 +125,11 @@ extern void RelationCacheInvalidate(void); extern void RelationCloseSmgrByOid(Oid relationId); +#ifdef USE_ASSERT_CHECKING +extern void AssertPendingSyncs_RelationCache(void); +#else +#define AssertPendingSyncs_RelationCache() do {} while (0) +#endif extern void AtEOXact_RelationCache(bool isCommit); extern void AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid, SubTransactionId parentSubid); |
