summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--contrib/pg_buffercache/pg_buffercache_pages.c6
-rw-r--r--src/backend/storage/buffer/buf_init.c21
-rw-r--r--src/backend/storage/buffer/bufmgr.c66
-rw-r--r--src/backend/storage/buffer/freelist.c6
-rw-r--r--src/backend/storage/buffer/localbuf.c12
-rw-r--r--src/include/c.h1
-rw-r--r--src/include/storage/buf_internals.h35
7 files changed, 93 insertions, 54 deletions
diff --git a/contrib/pg_buffercache/pg_buffercache_pages.c b/contrib/pg_buffercache/pg_buffercache_pages.c
index d00f9859088..98016fc365e 100644
--- a/contrib/pg_buffercache/pg_buffercache_pages.c
+++ b/contrib/pg_buffercache/pg_buffercache_pages.c
@@ -73,7 +73,6 @@ pg_buffercache_pages(PG_FUNCTION_ARGS)
if (SRF_IS_FIRSTCALL())
{
int i;
- volatile BufferDesc *bufHdr;
funcctx = SRF_FIRSTCALL_INIT();
@@ -146,8 +145,11 @@ pg_buffercache_pages(PG_FUNCTION_ARGS)
* Scan though all the buffers, saving the relevant fields in the
* fctx->record structure.
*/
- for (i = 0, bufHdr = BufferDescriptors; i < NBuffers; i++, bufHdr++)
+ for (i = 0; i < NBuffers; i++)
{
+ volatile BufferDesc *bufHdr;
+
+ bufHdr = GetBufferDescriptor(i);
/* Lock each buffer header before inspecting. */
LockBufHdr(bufHdr);
diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c
index 4434bc36f18..0cd2530be99 100644
--- a/src/backend/storage/buffer/buf_init.c
+++ b/src/backend/storage/buffer/buf_init.c
@@ -18,7 +18,7 @@
#include "storage/buf_internals.h"
-BufferDesc *BufferDescriptors;
+BufferDescPadded *BufferDescriptors;
char *BufferBlocks;
@@ -67,9 +67,11 @@ InitBufferPool(void)
bool foundBufs,
foundDescs;
- BufferDescriptors = (BufferDesc *)
+ /* Align descriptors to a cacheline boundary. */
+ BufferDescriptors = (BufferDescPadded *) CACHELINEALIGN(
ShmemInitStruct("Buffer Descriptors",
- NBuffers * sizeof(BufferDesc), &foundDescs);
+ NBuffers * sizeof(BufferDescPadded) + PG_CACHE_LINE_SIZE,
+ &foundDescs));
BufferBlocks = (char *)
ShmemInitStruct("Buffer Blocks",
@@ -83,16 +85,15 @@ InitBufferPool(void)
}
else
{
- BufferDesc *buf;
int i;
- buf = BufferDescriptors;
-
/*
* Initialize all the buffer headers.
*/
- for (i = 0; i < NBuffers; buf++, i++)
+ for (i = 0; i < NBuffers; i++)
{
+ BufferDesc *buf = GetBufferDescriptor(i);
+
CLEAR_BUFFERTAG(buf->tag);
buf->flags = 0;
buf->usage_count = 0;
@@ -114,7 +115,7 @@ InitBufferPool(void)
}
/* Correct last entry of linked list */
- BufferDescriptors[NBuffers - 1].freeNext = FREENEXT_END_OF_LIST;
+ GetBufferDescriptor(NBuffers - 1)->freeNext = FREENEXT_END_OF_LIST;
}
/* Init other shared buffer-management stuff */
@@ -133,7 +134,9 @@ BufferShmemSize(void)
Size size = 0;
/* size of buffer descriptors */
- size = add_size(size, mul_size(NBuffers, sizeof(BufferDesc)));
+ size = add_size(size, mul_size(NBuffers, sizeof(BufferDescPadded)));
+ /* to allow aligning buffer descriptors */
+ size = add_size(size, PG_CACHE_LINE_SIZE);
/* size of data pages */
size = add_size(size, mul_size(NBuffers, BLCKSZ));
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 7430407788b..e1e6240fe3e 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -898,7 +898,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
* buffer pool, and check to see if the correct data has been loaded
* into the buffer.
*/
- buf = &BufferDescriptors[buf_id];
+ buf = GetBufferDescriptor(buf_id);
valid = PinBuffer(buf, strategy);
@@ -1105,7 +1105,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
/* remaining code should match code at top of routine */
- buf = &BufferDescriptors[buf_id];
+ buf = GetBufferDescriptor(buf_id);
valid = PinBuffer(buf, strategy);
@@ -1328,7 +1328,7 @@ MarkBufferDirty(Buffer buffer)
return;
}
- bufHdr = &BufferDescriptors[buffer - 1];
+ bufHdr = GetBufferDescriptor(buffer - 1);
Assert(BufferIsPinned(buffer));
/* unfortunately we can't check if the lock is held exclusively */
@@ -1380,7 +1380,7 @@ ReleaseAndReadBuffer(Buffer buffer,
Assert(BufferIsPinned(buffer));
if (BufferIsLocal(buffer))
{
- bufHdr = &LocalBufferDescriptors[-buffer - 1];
+ bufHdr = GetLocalBufferDescriptor(-buffer - 1);
if (bufHdr->tag.blockNum == blockNum &&
RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node) &&
bufHdr->tag.forkNum == forkNum)
@@ -1390,7 +1390,7 @@ ReleaseAndReadBuffer(Buffer buffer,
}
else
{
- bufHdr = &BufferDescriptors[buffer - 1];
+ bufHdr = GetBufferDescriptor(buffer - 1);
/* we have pin, so it's ok to examine tag without spinlock */
if (bufHdr->tag.blockNum == blockNum &&
RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node) &&
@@ -1609,7 +1609,7 @@ BufferSync(int flags)
num_to_write = 0;
for (buf_id = 0; buf_id < NBuffers; buf_id++)
{
- volatile BufferDesc *bufHdr = &BufferDescriptors[buf_id];
+ volatile BufferDesc *bufHdr = GetBufferDescriptor(buf_id);
/*
* Header spinlock is enough to examine BM_DIRTY, see comment in
@@ -1644,7 +1644,7 @@ BufferSync(int flags)
num_written = 0;
while (num_to_scan-- > 0)
{
- volatile BufferDesc *bufHdr = &BufferDescriptors[buf_id];
+ volatile BufferDesc *bufHdr = GetBufferDescriptor(buf_id);
/*
* We don't need to acquire the lock here, because we're only looking
@@ -2016,7 +2016,7 @@ BgBufferSync(void)
static int
SyncOneBuffer(int buf_id, bool skip_recently_used)
{
- volatile BufferDesc *bufHdr = &BufferDescriptors[buf_id];
+ volatile BufferDesc *bufHdr = GetBufferDescriptor(buf_id);
int result = 0;
ReservePrivateRefCountEntry();
@@ -2196,13 +2196,13 @@ PrintBufferLeakWarning(Buffer buffer)
Assert(BufferIsValid(buffer));
if (BufferIsLocal(buffer))
{
- buf = &LocalBufferDescriptors[-buffer - 1];
+ buf = GetLocalBufferDescriptor(-buffer - 1);
loccount = LocalRefCount[-buffer - 1];
backend = MyBackendId;
}
else
{
- buf = &BufferDescriptors[buffer - 1];
+ buf = GetBufferDescriptor(buffer - 1);
loccount = GetPrivateRefCount(buffer);
backend = InvalidBackendId;
}
@@ -2265,9 +2265,9 @@ BufferGetBlockNumber(Buffer buffer)
Assert(BufferIsPinned(buffer));
if (BufferIsLocal(buffer))
- bufHdr = &(LocalBufferDescriptors[-buffer - 1]);
+ bufHdr = GetLocalBufferDescriptor(-buffer - 1);
else
- bufHdr = &BufferDescriptors[buffer - 1];
+ bufHdr = GetBufferDescriptor(buffer - 1);
/* pinned, so OK to read tag without spinlock */
return bufHdr->tag.blockNum;
@@ -2288,9 +2288,9 @@ BufferGetTag(Buffer buffer, RelFileNode *rnode, ForkNumber *forknum,
Assert(BufferIsPinned(buffer));
if (BufferIsLocal(buffer))
- bufHdr = &(LocalBufferDescriptors[-buffer - 1]);
+ bufHdr = GetLocalBufferDescriptor(-buffer - 1);
else
- bufHdr = &BufferDescriptors[buffer - 1];
+ bufHdr = GetBufferDescriptor(buffer - 1);
/* pinned, so OK to read tag without spinlock */
*rnode = bufHdr->tag.rnode;
@@ -2473,7 +2473,7 @@ BufferIsPermanent(Buffer buffer)
* changing an aligned 2-byte BufFlags value is atomic, so we'll read the
* old value or the new value, but not random garbage.
*/
- bufHdr = &BufferDescriptors[buffer - 1];
+ bufHdr = GetBufferDescriptor(buffer - 1);
return (bufHdr->flags & BM_PERMANENT) != 0;
}
@@ -2486,7 +2486,7 @@ BufferIsPermanent(Buffer buffer)
XLogRecPtr
BufferGetLSNAtomic(Buffer buffer)
{
- volatile BufferDesc *bufHdr = &BufferDescriptors[buffer - 1];
+ volatile BufferDesc *bufHdr = GetBufferDescriptor(buffer - 1);
char *page = BufferGetPage(buffer);
XLogRecPtr lsn;
@@ -2549,7 +2549,7 @@ DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber forkNum,
for (i = 0; i < NBuffers; i++)
{
- volatile BufferDesc *bufHdr = &BufferDescriptors[i];
+ volatile BufferDesc *bufHdr = GetBufferDescriptor(i);
/*
* We can make this a tad faster by prechecking the buffer tag before
@@ -2639,7 +2639,7 @@ DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes)
for (i = 0; i < NBuffers; i++)
{
RelFileNode *rnode = NULL;
- volatile BufferDesc *bufHdr = &BufferDescriptors[i];
+ volatile BufferDesc *bufHdr = GetBufferDescriptor(i);
/*
* As in DropRelFileNodeBuffers, an unlocked precheck should be safe
@@ -2703,7 +2703,7 @@ DropDatabaseBuffers(Oid dbid)
for (i = 0; i < NBuffers; i++)
{
- volatile BufferDesc *bufHdr = &BufferDescriptors[i];
+ volatile BufferDesc *bufHdr = GetBufferDescriptor(i);
/*
* As in DropRelFileNodeBuffers, an unlocked precheck should be safe
@@ -2732,10 +2732,11 @@ void
PrintBufferDescs(void)
{
int i;
- volatile BufferDesc *buf = BufferDescriptors;
- for (i = 0; i < NBuffers; ++i, ++buf)
+ for (i = 0; i < NBuffers; ++i)
{
+ volatile BufferDesc *buf = GetBufferDescriptor(i);
+
/* theoretically we should lock the bufhdr here */
elog(LOG,
"[%02d] (freeNext=%d, rel=%s, "
@@ -2753,10 +2754,11 @@ void
PrintPinnedBufs(void)
{
int i;
- volatile BufferDesc *buf = BufferDescriptors;
- for (i = 0; i < NBuffers; ++i, ++buf)
+ for (i = 0; i < NBuffers; ++i)
{
+ volatile BufferDesc *buf = GetBufferDescriptor(i);
+
if (GetPrivateRefCount(i + 1) > 0)
{
/* theoretically we should lock the bufhdr here */
@@ -2804,7 +2806,7 @@ FlushRelationBuffers(Relation rel)
{
for (i = 0; i < NLocBuffer; i++)
{
- bufHdr = &LocalBufferDescriptors[i];
+ bufHdr = GetLocalBufferDescriptor(i);
if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) &&
(bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY))
{
@@ -2842,7 +2844,7 @@ FlushRelationBuffers(Relation rel)
for (i = 0; i < NBuffers; i++)
{
- bufHdr = &BufferDescriptors[i];
+ bufHdr = GetBufferDescriptor(i);
/*
* As in DropRelFileNodeBuffers, an unlocked precheck should be safe
@@ -2894,7 +2896,7 @@ FlushDatabaseBuffers(Oid dbid)
for (i = 0; i < NBuffers; i++)
{
- bufHdr = &BufferDescriptors[i];
+ bufHdr = GetBufferDescriptor(i);
/*
* As in DropRelFileNodeBuffers, an unlocked precheck should be safe
@@ -2938,7 +2940,7 @@ ReleaseBuffer(Buffer buffer)
return;
}
- UnpinBuffer(&BufferDescriptors[buffer - 1], true);
+ UnpinBuffer(GetBufferDescriptor(buffer - 1), true);
}
/*
@@ -3007,7 +3009,7 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
return;
}
- bufHdr = &BufferDescriptors[buffer - 1];
+ bufHdr = GetBufferDescriptor(buffer - 1);
Assert(GetPrivateRefCount(buffer) > 0);
/* here, either share or exclusive lock is OK */
@@ -3161,7 +3163,7 @@ LockBuffer(Buffer buffer, int mode)
if (BufferIsLocal(buffer))
return; /* local buffers need no lock */
- buf = &(BufferDescriptors[buffer - 1]);
+ buf = GetBufferDescriptor(buffer - 1);
if (mode == BUFFER_LOCK_UNLOCK)
LWLockRelease(buf->content_lock);
@@ -3187,7 +3189,7 @@ ConditionalLockBuffer(Buffer buffer)
if (BufferIsLocal(buffer))
return true; /* act as though we got it */
- buf = &(BufferDescriptors[buffer - 1]);
+ buf = GetBufferDescriptor(buffer - 1);
return LWLockConditionalAcquire(buf->content_lock, LW_EXCLUSIVE);
}
@@ -3231,7 +3233,7 @@ LockBufferForCleanup(Buffer buffer)
elog(ERROR, "incorrect local pin count: %d",
GetPrivateRefCount(buffer));
- bufHdr = &BufferDescriptors[buffer - 1];
+ bufHdr = GetBufferDescriptor(buffer - 1);
for (;;)
{
@@ -3332,7 +3334,7 @@ ConditionalLockBufferForCleanup(Buffer buffer)
if (!ConditionalLockBuffer(buffer))
return false;
- bufHdr = &BufferDescriptors[buffer - 1];
+ bufHdr = GetBufferDescriptor(buffer - 1);
LockBufHdr(bufHdr);
Assert(bufHdr->refcount > 0);
if (bufHdr->refcount == 1)
diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c
index 3add619b5da..0d1cbd15531 100644
--- a/src/backend/storage/buffer/freelist.c
+++ b/src/backend/storage/buffer/freelist.c
@@ -259,7 +259,7 @@ StrategyGetBuffer(BufferAccessStrategy strategy)
break;
}
- buf = &BufferDescriptors[StrategyControl->firstFreeBuffer];
+ buf = GetBufferDescriptor(StrategyControl->firstFreeBuffer);
Assert(buf->freeNext != FREENEXT_NOT_IN_LIST);
/* Unconditionally remove buffer from freelist */
@@ -296,7 +296,7 @@ StrategyGetBuffer(BufferAccessStrategy strategy)
for (;;)
{
- buf = &BufferDescriptors[ClockSweepTick()];
+ buf = GetBufferDescriptor(ClockSweepTick());
/*
* If the buffer is pinned or has a nonzero usage_count, we cannot use
@@ -614,7 +614,7 @@ GetBufferFromRing(BufferAccessStrategy strategy)
* higher usage_count indicates someone else has touched the buffer, so we
* shouldn't re-use it.
*/
- buf = &BufferDescriptors[bufnum - 1];
+ buf = GetBufferDescriptor(bufnum - 1);
LockBufHdr(buf);
if (buf->refcount == 0 && buf->usage_count <= 1)
{
diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c
index 1fc0af386cb..3144afe37bc 100644
--- a/src/backend/storage/buffer/localbuf.c
+++ b/src/backend/storage/buffer/localbuf.c
@@ -122,7 +122,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
if (hresult)
{
b = hresult->id;
- bufHdr = &LocalBufferDescriptors[b];
+ bufHdr = GetLocalBufferDescriptor(b);
Assert(BUFFERTAGS_EQUAL(bufHdr->tag, newTag));
#ifdef LBDEBUG
fprintf(stderr, "LB ALLOC (%u,%d,%d) %d\n",
@@ -165,7 +165,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
if (++nextFreeLocalBuf >= NLocBuffer)
nextFreeLocalBuf = 0;
- bufHdr = &LocalBufferDescriptors[b];
+ bufHdr = GetLocalBufferDescriptor(b);
if (LocalRefCount[b] == 0)
{
@@ -278,7 +278,7 @@ MarkLocalBufferDirty(Buffer buffer)
Assert(LocalRefCount[bufid] > 0);
- bufHdr = &LocalBufferDescriptors[bufid];
+ bufHdr = GetLocalBufferDescriptor(bufid);
if (!(bufHdr->flags & BM_DIRTY))
pgBufferUsage.local_blks_dirtied++;
@@ -305,7 +305,7 @@ DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum,
for (i = 0; i < NLocBuffer; i++)
{
- BufferDesc *bufHdr = &LocalBufferDescriptors[i];
+ BufferDesc *bufHdr = GetLocalBufferDescriptor(i);
LocalBufferLookupEnt *hresult;
if ((bufHdr->flags & BM_TAG_VALID) &&
@@ -347,7 +347,7 @@ DropRelFileNodeAllLocalBuffers(RelFileNode rnode)
for (i = 0; i < NLocBuffer; i++)
{
- BufferDesc *bufHdr = &LocalBufferDescriptors[i];
+ BufferDesc *bufHdr = GetLocalBufferDescriptor(i);
LocalBufferLookupEnt *hresult;
if ((bufHdr->flags & BM_TAG_VALID) &&
@@ -400,7 +400,7 @@ InitLocalBuffers(void)
/* initialize fields that need to start off nonzero */
for (i = 0; i < nbufs; i++)
{
- BufferDesc *buf = &LocalBufferDescriptors[i];
+ BufferDesc *buf = GetLocalBufferDescriptor(i);
/*
* negative to indicate local buffer. This is tricky: shared buffers
diff --git a/src/include/c.h b/src/include/c.h
index c929d3d6aec..b1875209736 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -537,6 +537,7 @@ typedef NameData *Name;
#define MAXALIGN(LEN) TYPEALIGN(MAXIMUM_ALIGNOF, (LEN))
/* MAXALIGN covers only built-in types, not buffers */
#define BUFFERALIGN(LEN) TYPEALIGN(ALIGNOF_BUFFER, (LEN))
+#define CACHELINEALIGN(LEN) TYPEALIGN(PG_CACHE_LINE_SIZE, (LEN))
#define TYPEALIGN_DOWN(ALIGNVAL,LEN) \
(((uintptr_t) (LEN)) & ~((uintptr_t) ((ALIGNVAL) - 1)))
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index 9b8ace54da8..521ee1cfaea 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -134,7 +134,7 @@ typedef struct buftag
* We use this same struct for local buffer headers, but the lock fields
* are not used and not all of the flag bits are useful either.
*/
-typedef struct sbufdesc
+typedef struct BufferDesc
{
BufferTag tag; /* ID of page contained in buffer */
BufFlags flags; /* see bit definitions above */
@@ -151,6 +151,37 @@ typedef struct sbufdesc
LWLock *content_lock; /* to lock access to buffer contents */
} BufferDesc;
+/*
+ * Concurrent access to buffer headers has proven to be more efficient if
+ * they're cache line aligned. So we force the start of the BufferDescriptors
+ * array to be on a cache line boundary and force the elements to be cache
+ * line sized.
+ *
+ * XXX: As this is primarily matters in highly concurrent workloads which
+ * probably all are 64bit these days, and the space wastage would be a bit
+ * more noticeable on 32bit systems, we don't force the stride to be cache
+ * line sized on those. If somebody does actual performance testing, we can
+ * reevaluate.
+ *
+ * Note that local buffer descriptors aren't forced to be aligned - as there's
+ * no concurrent access to those it's unlikely to be beneficial.
+ *
+ * We use 64bit as the cache line size here, because that's the most common
+ * size. Making it bigger would be a waste of memory. Even if running on a
+ * platform with either 32 or 128 byte line sizes, it's good to align to
+ * boundaries and avoid false sharing.
+ */
+#define BUFFERDESC_PAD_TO_SIZE (SIZEOF_VOID_P == 8 ? 64 : 1)
+
+typedef union BufferDescPadded
+{
+ BufferDesc bufferdesc;
+ char pad[BUFFERDESC_PAD_TO_SIZE];
+} BufferDescPadded;
+
+#define GetBufferDescriptor(id) (&BufferDescriptors[(id)].bufferdesc)
+#define GetLocalBufferDescriptor(id) (&LocalBufferDescriptors[(id)])
+
#define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1)
/*
@@ -174,7 +205,7 @@ typedef struct sbufdesc
/* in buf_init.c */
-extern PGDLLIMPORT BufferDesc *BufferDescriptors;
+extern PGDLLIMPORT BufferDescPadded *BufferDescriptors;
/* in localbuf.c */
extern BufferDesc *LocalBufferDescriptors;