summaryrefslogtreecommitdiff
path: root/src/include
diff options
context:
space:
mode:
authorAndres Freund2023-04-05 23:21:09 +0000
committerAndres Freund2023-04-05 23:21:09 +0000
commit31966b151e6ab7a6284deab6e8fe5faddaf2ae4c (patch)
tree76deeba4e15702f9596a6d935a8bd185554b8b45 /src/include
parent8eda7314652703a2ae30d6c4a69c378f6813a7f2 (diff)
bufmgr: Introduce infrastructure for faster relation extension
The primary bottlenecks for relation extension are: 1) The extension lock is held while acquiring a victim buffer for the new page. Acquiring a victim buffer can require writing out the old page contents including possibly needing to flush WAL. 2) When extending via ReadBuffer() et al, we write a zero page during the extension, and then later write out the actual page contents. This can nearly double the write rate. 3) The existing bulk relation extension infrastructure in hio.c just amortized the cost of acquiring the relation extension lock, but none of the other costs. Unfortunately 1) cannot currently be addressed in a central manner as the callers to ReadBuffer() need to acquire the extension lock. To address that, this this commit moves the responsibility for acquiring the extension lock into bufmgr.c functions. That allows to acquire the relation extension lock for just the required time. This will also allow us to improve relation extension further, without changing callers. The reason we write all-zeroes pages during relation extension is that we hope to get ENOSPC errors earlier that way (largely works, except for CoW filesystems). It is easier to handle out-of-space errors gracefully if the page doesn't yet contain actual tuples. This commit addresses 2), by using the recently introduced smgrzeroextend(), which extends the relation, without dirtying the kernel page cache for all the extended pages. To address 3), this commit introduces a function to extend a relation by multiple blocks at a time. There are three new exposed functions: ExtendBufferedRel() for extending the relation by a single block, ExtendBufferedRelBy() to extend a relation by multiple blocks at once, and ExtendBufferedRelTo() for extending a relation up to a certain size. To avoid duplicating code between ReadBuffer(P_NEW) and the new functions, ReadBuffer(P_NEW) now implements relation extension with ExtendBufferedRel(), using a flag to tell ExtendBufferedRel() that the relation lock is already held. Note that this commit does not yet lead to a meaningful performance or scalability improvement - for that uses of ReadBuffer(P_NEW) will need to be converted to ExtendBuffered*(), which will be done in subsequent commits. Reviewed-by: Heikki Linnakangas <hlinnaka@iki.fi> Reviewed-by: Melanie Plageman <melanieplageman@gmail.com> Discussion: https://postgr.es/m/20221029025420.eplyow6k7tgu6he3@awork3.anarazel.de
Diffstat (limited to 'src/include')
-rw-r--r--src/include/pgstat.h1
-rw-r--r--src/include/storage/buf_internals.h7
-rw-r--r--src/include/storage/bufmgr.h65
3 files changed, 73 insertions, 0 deletions
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 75d258d9215..e79b8a34ebc 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -516,6 +516,7 @@ extern PgStat_CheckpointerStats *pgstat_fetch_stat_checkpointer(void);
extern bool pgstat_bktype_io_stats_valid(PgStat_BktypeIO *context_ops,
BackendType bktype);
extern void pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op);
+extern void pgstat_count_io_op_n(IOObject io_object, IOContext io_context, IOOp io_op, uint32 cnt);
extern PgStat_IO *pgstat_fetch_stat_io(void);
extern const char *pgstat_get_io_context_name(IOContext io_context);
extern const char *pgstat_get_io_object_name(IOObject io_object);
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index 970d0090615..34feaea9945 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -422,6 +422,13 @@ extern PrefetchBufferResult PrefetchLocalBuffer(SMgrRelation smgr,
BlockNumber blockNum);
extern BufferDesc *LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum,
BlockNumber blockNum, bool *foundPtr);
+extern BlockNumber ExtendBufferedRelLocal(ExtendBufferedWhat eb,
+ ForkNumber fork,
+ uint32 flags,
+ uint32 extend_by,
+ BlockNumber extend_upto,
+ Buffer *buffers,
+ uint32 *extended_by);
extern void MarkLocalBufferDirty(Buffer buffer);
extern void DropRelationLocalBuffers(RelFileLocator rlocator,
ForkNumber forkNum,
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index 537b89e8774..788aa279ba0 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -60,6 +60,53 @@ typedef struct PrefetchBufferResult
bool initiated_io; /* If true, a miss resulting in async I/O */
} PrefetchBufferResult;
+/*
+ * Flags influencing the behaviour of ExtendBufferedRel*
+ */
+typedef enum ExtendBufferedFlags
+{
+ /*
+ * Don't acquire extension lock. This is safe only if the relation isn't
+ * shared, an access exclusive lock is held or if this is the startup
+ * process.
+ */
+ EB_SKIP_EXTENSION_LOCK = (1 << 0),
+
+ /* Is this extension part of recovery? */
+ EB_PERFORMING_RECOVERY = (1 << 1),
+
+ /*
+ * Should the fork be created if it does not currently exist? This likely
+ * only ever makes sense for relation forks.
+ */
+ EB_CREATE_FORK_IF_NEEDED = (1 << 2),
+
+ /* Should the first (possibly only) return buffer be returned locked? */
+ EB_LOCK_FIRST = (1 << 3),
+
+ /* Should the smgr size cache be cleared? */
+ EB_CLEAR_SIZE_CACHE = (1 << 4),
+
+ /* internal flags follow */
+ EB_LOCK_TARGET = (1 << 5),
+} ExtendBufferedFlags;
+
+/*
+ * To identify the relation - either relation or smgr + relpersistence has to
+ * be specified. Used via the EB_REL()/EB_SMGR() macros below. This allows us
+ * to use the same function for both crash recovery and normal operation.
+ */
+typedef struct ExtendBufferedWhat
+{
+ Relation rel;
+ struct SMgrRelationData *smgr;
+ char relpersistence;
+} ExtendBufferedWhat;
+
+#define EB_REL(p_rel) ((ExtendBufferedWhat){.rel = p_rel})
+#define EB_SMGR(p_smgr, p_relpersistence) ((ExtendBufferedWhat){.smgr = p_smgr, .relpersistence = p_relpersistence})
+
+
/* forward declared, to avoid having to expose buf_internals.h here */
struct WritebackContext;
@@ -138,6 +185,24 @@ extern void CheckBufferIsPinnedOnce(Buffer buffer);
extern Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation,
BlockNumber blockNum);
+extern Buffer ExtendBufferedRel(ExtendBufferedWhat eb,
+ ForkNumber forkNum,
+ BufferAccessStrategy strategy,
+ uint32 flags);
+extern BlockNumber ExtendBufferedRelBy(ExtendBufferedWhat eb,
+ ForkNumber fork,
+ BufferAccessStrategy strategy,
+ uint32 flags,
+ uint32 extend_by,
+ Buffer *buffers,
+ uint32 *extended_by);
+extern Buffer ExtendBufferedRelTo(ExtendBufferedWhat eb,
+ ForkNumber fork,
+ BufferAccessStrategy strategy,
+ uint32 flags,
+ BlockNumber extend_to,
+ ReadBufferMode mode);
+
extern void InitBufferPoolAccess(void);
extern void AtEOXact_Buffers(bool isCommit);
extern void PrintBufferLeakWarning(Buffer buffer);