ForkNumber forkNum,
BlockNumber blockNum,
BufferAccessStrategy strategy,
- bool *foundPtr);
-static void FlushBuffer(BufferDesc *buf, SMgrRelation reln);
+ bool *foundPtr, IOContext *io_context);
+static void FlushBuffer(BufferDesc *buf, SMgrRelation reln,
+ IOObject io_object, IOContext io_context);
static void FindAndDropRelationBuffers(RelFileLocator rlocator,
ForkNumber forkNum,
BlockNumber nForkBlock,
BufferDesc *bufHdr;
Block bufBlock;
bool found;
+ IOContext io_context;
+ IOObject io_object;
bool isExtend;
bool isLocalBuf = SmgrIsTemp(smgr);
if (isLocalBuf)
{
- bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found);
+ /*
+ * LocalBufferAlloc() will set the io_context to IOCONTEXT_NORMAL. We
+ * do not use a BufferAccessStrategy for I/O of temporary tables.
+ * However, in some cases, the "strategy" may not be NULL, so we can't
+ * rely on IOContextForStrategy() to set the right IOContext for us.
+ * This may happen in cases like CREATE TEMPORARY TABLE AS...
+ */
+ bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found, &io_context);
if (found)
pgBufferUsage.local_blks_hit++;
else if (isExtend)
* not currently in memory.
*/
bufHdr = BufferAlloc(smgr, relpersistence, forkNum, blockNum,
- strategy, &found);
+ strategy, &found, &io_context);
if (found)
pgBufferUsage.shared_blks_hit++;
else if (isExtend)
*/
Assert(!(pg_atomic_read_u32(&bufHdr->state) & BM_VALID)); /* spinlock not needed */
- bufBlock = isLocalBuf ? LocalBufHdrGetBlock(bufHdr) : BufHdrGetBlock(bufHdr);
+ if (isLocalBuf)
+ {
+ bufBlock = LocalBufHdrGetBlock(bufHdr);
+ io_object = IOOBJECT_TEMP_RELATION;
+ }
+ else
+ {
+ bufBlock = BufHdrGetBlock(bufHdr);
+ io_object = IOOBJECT_RELATION;
+ }
if (isExtend)
{
/* don't set checksum for all-zero page */
smgrextend(smgr, forkNum, blockNum, (char *) bufBlock, false);
+ pgstat_count_io_op(io_object, io_context, IOOP_EXTEND);
+
/*
* NB: we're *not* doing a ScheduleBufferTagForWriteback here;
* although we're essentially performing a write. At least on linux
smgrread(smgr, forkNum, blockNum, (char *) bufBlock);
+ pgstat_count_io_op(io_object, io_context, IOOP_READ);
+
if (track_io_timing)
{
INSTR_TIME_SET_CURRENT(io_time);
* *foundPtr is actually redundant with the buffer's BM_VALID flag, but
* we keep it for simplicity in ReadBuffer.
*
+ * io_context is passed as an output parameter to avoid calling
+ * IOContextForStrategy() when there is a shared buffers hit and no IO
+ * statistics need be captured.
+ *
* No locks are held either at entry or exit.
*/
static BufferDesc *
BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
BlockNumber blockNum,
BufferAccessStrategy strategy,
- bool *foundPtr)
+ bool *foundPtr, IOContext *io_context)
{
+ bool from_ring;
BufferTag newTag; /* identity of requested block */
uint32 newHash; /* hash value for newTag */
LWLock *newPartitionLock; /* buffer partition lock for it */
{
/*
* If we get here, previous attempts to read the buffer must
- * have failed ... but we shall bravely try again.
+ * have failed ... but we shall bravely try again. Set
+ * io_context since we will in fact need to count an IO
+ * Operation.
*/
+ *io_context = IOContextForStrategy(strategy);
*foundPtr = false;
}
}
*/
LWLockRelease(newPartitionLock);
+ *io_context = IOContextForStrategy(strategy);
+
/* Loop here in case we have to try another victim buffer */
for (;;)
{
* Select a victim buffer. The buffer is returned with its header
* spinlock still held!
*/
- buf = StrategyGetBuffer(strategy, &buf_state);
+ buf = StrategyGetBuffer(strategy, &buf_state, &from_ring);
Assert(BUF_STATE_GET_REFCOUNT(buf_state) == 0);
UnlockBufHdr(buf, buf_state);
if (XLogNeedsFlush(lsn) &&
- StrategyRejectBuffer(strategy, buf))
+ StrategyRejectBuffer(strategy, buf, from_ring))
{
/* Drop lock/pin and loop around for another buffer */
LWLockRelease(BufferDescriptorGetContentLock(buf));
smgr->smgr_rlocator.locator.dbOid,
smgr->smgr_rlocator.locator.relNumber);
- FlushBuffer(buf, NULL);
+ FlushBuffer(buf, NULL, IOOBJECT_RELATION, *io_context);
LWLockRelease(BufferDescriptorGetContentLock(buf));
ScheduleBufferTagForWriteback(&BackendWritebackContext,
LWLockRelease(newPartitionLock);
+ if (oldFlags & BM_VALID)
+ {
+ /*
+ * When a BufferAccessStrategy is in use, blocks evicted from shared
+ * buffers are counted as IOOP_EVICT in the corresponding context
+ * (e.g. IOCONTEXT_BULKWRITE). Shared buffers are evicted by a
+ * strategy in two cases: 1) while initially claiming buffers for the
+ * strategy ring 2) to replace an existing strategy ring buffer
+ * because it is pinned or in use and cannot be reused.
+ *
+ * Blocks evicted from buffers already in the strategy ring are
+ * counted as IOOP_REUSE in the corresponding strategy context.
+ *
+ * At this point, we can accurately count evictions and reuses,
+ * because we have successfully claimed the valid buffer. Previously,
+ * we may have been forced to release the buffer due to concurrent
+ * pinners or erroring out.
+ */
+ pgstat_count_io_op(IOOBJECT_RELATION, *io_context,
+ from_ring ? IOOP_REUSE : IOOP_EVICT);
+ }
+
/*
* Buffer contents are currently invalid. Try to obtain the right to
* start I/O. If StartBufferIO returns false, then someone else managed
PinBuffer_Locked(bufHdr);
LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED);
- FlushBuffer(bufHdr, NULL);
+ FlushBuffer(bufHdr, NULL, IOOBJECT_RELATION, IOCONTEXT_NORMAL);
LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
* as the second parameter. If not, pass NULL.
*/
static void
-FlushBuffer(BufferDesc *buf, SMgrRelation reln)
+FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object,
+ IOContext io_context)
{
XLogRecPtr recptr;
ErrorContextCallback errcallback;
bufToWrite,
false);
+ /*
+ * When a strategy is in use, only flushes of dirty buffers already in the
+ * strategy ring are counted as strategy writes (IOCONTEXT
+ * [BULKREAD|BULKWRITE|VACUUM] IOOP_WRITE) for the purpose of IO
+ * statistics tracking.
+ *
+ * If a shared buffer initially added to the ring must be flushed before
+ * being used, this is counted as an IOCONTEXT_NORMAL IOOP_WRITE.
+ *
+ * If a shared buffer which was added to the ring later because the
+ * current strategy buffer is pinned or in use or because all strategy
+ * buffers were dirty and rejected (for BAS_BULKREAD operations only)
+ * requires flushing, this is counted as an IOCONTEXT_NORMAL IOOP_WRITE
+ * (from_ring will be false).
+ *
+ * When a strategy is not in use, the write can only be a "regular" write
+ * of a dirty shared buffer (IOCONTEXT_NORMAL IOOP_WRITE).
+ */
+ pgstat_count_io_op(IOOBJECT_RELATION, io_context, IOOP_WRITE);
+
if (track_io_timing)
{
INSTR_TIME_SET_CURRENT(io_time);
buf_state &= ~(BM_DIRTY | BM_JUST_DIRTIED);
pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
+ pgstat_count_io_op(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_WRITE);
+
/* Pop the error context stack */
error_context_stack = errcallback.previous;
}
{
PinBuffer_Locked(bufHdr);
LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED);
- FlushBuffer(bufHdr, RelationGetSmgr(rel));
+ FlushBuffer(bufHdr, RelationGetSmgr(rel), IOOBJECT_RELATION, IOCONTEXT_NORMAL);
LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
UnpinBuffer(bufHdr);
}
{
PinBuffer_Locked(bufHdr);
LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED);
- FlushBuffer(bufHdr, srelent->srel);
+ FlushBuffer(bufHdr, srelent->srel, IOOBJECT_RELATION, IOCONTEXT_NORMAL);
LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
UnpinBuffer(bufHdr);
}
{
PinBuffer_Locked(bufHdr);
LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED);
- FlushBuffer(bufHdr, NULL);
+ FlushBuffer(bufHdr, NULL, IOOBJECT_RELATION, IOCONTEXT_NORMAL);
LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
UnpinBuffer(bufHdr);
}
Assert(LWLockHeldByMe(BufferDescriptorGetContentLock(bufHdr)));
- FlushBuffer(bufHdr, NULL);
+ FlushBuffer(bufHdr, NULL, IOOBJECT_RELATION, IOCONTEXT_NORMAL);
}
/*
*/
#include "postgres.h"
+#include "pgstat.h"
#include "port/atomics.h"
#include "storage/buf_internals.h"
#include "storage/bufmgr.h"
*/
int current;
- /*
- * True if the buffer just returned by StrategyGetBuffer had been in the
- * ring already.
- */
- bool current_was_in_ring;
-
/*
* Array of buffer numbers. InvalidBuffer (that is, zero) indicates we
* have not yet selected a buffer for this ring slot. For allocation
* return the buffer with the buffer header spinlock still held.
*/
BufferDesc *
-StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state)
+StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state, bool *from_ring)
{
BufferDesc *buf;
int bgwprocno;
int trycounter;
uint32 local_buf_state; /* to avoid repeated (de-)referencing */
+ *from_ring = false;
+
/*
* If given a strategy object, see whether it can select a buffer. We
* assume strategy objects don't need buffer_strategy_lock.
{
buf = GetBufferFromRing(strategy, buf_state);
if (buf != NULL)
+ {
+ *from_ring = true;
return buf;
+ }
}
/*
/*
* GetBufferFromRing -- returns a buffer from the ring, or NULL if the
- * ring is empty.
+ * ring is empty / not usable.
*
* The bufhdr spin lock is held on the returned buffer.
*/
*/
bufnum = strategy->buffers[strategy->current];
if (bufnum == InvalidBuffer)
- {
- strategy->current_was_in_ring = false;
return NULL;
- }
/*
* If the buffer is pinned we cannot use it under any circumstances.
if (BUF_STATE_GET_REFCOUNT(local_buf_state) == 0
&& BUF_STATE_GET_USAGECOUNT(local_buf_state) <= 1)
{
- strategy->current_was_in_ring = true;
*buf_state = local_buf_state;
return buf;
}
* Tell caller to allocate a new buffer with the normal allocation
* strategy. He'll then replace this ring element via AddBufferToRing.
*/
- strategy->current_was_in_ring = false;
return NULL;
}
strategy->buffers[strategy->current] = BufferDescriptorGetBuffer(buf);
}
+/*
+ * Utility function returning the IOContext of a given BufferAccessStrategy's
+ * strategy ring.
+ */
+IOContext
+IOContextForStrategy(BufferAccessStrategy strategy)
+{
+ if (!strategy)
+ return IOCONTEXT_NORMAL;
+
+ switch (strategy->btype)
+ {
+ case BAS_NORMAL:
+
+ /*
+ * Currently, GetAccessStrategy() returns NULL for
+ * BufferAccessStrategyType BAS_NORMAL, so this case is
+ * unreachable.
+ */
+ pg_unreachable();
+ return IOCONTEXT_NORMAL;
+ case BAS_BULKREAD:
+ return IOCONTEXT_BULKREAD;
+ case BAS_BULKWRITE:
+ return IOCONTEXT_BULKWRITE;
+ case BAS_VACUUM:
+ return IOCONTEXT_VACUUM;
+ }
+
+ elog(ERROR, "unrecognized BufferAccessStrategyType: %d", strategy->btype);
+ pg_unreachable();
+}
+
/*
* StrategyRejectBuffer -- consider rejecting a dirty buffer
*
* if this buffer should be written and re-used.
*/
bool
-StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf)
+StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf, bool from_ring)
{
/* We only do this in bulkread mode */
if (strategy->btype != BAS_BULKREAD)
return false;
/* Don't muck with behavior of normal buffer-replacement strategy */
- if (!strategy->current_was_in_ring ||
+ if (!from_ring ||
strategy->buffers[strategy->current] != BufferDescriptorGetBuffer(buf))
return false;