to write out data that we are about to tell the filesystem to drop.
smgr_internal_unlink already had a DropRelFileNodeBuffers call to
get rid of dead buffers without a write after it's no longer possible
to roll back the deleting transaction. Adding a similar call in
smgrtruncate simplifies callers and makes the overall division of
labor clearer. This patch removes the former behavior that VACUUM
would write all dirty buffers of a relation unconditionally.
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.124 2004/12/31 21:59:22 pgsql Exp $
+ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.125 2005/03/20 22:00:50 tgl Exp $
*
*-------------------------------------------------------------------------
*/
{
/*
* Okay to truncate.
- *
- * First, flush any shared buffers for the blocks we intend to
- * delete. FlushRelationBuffers is a bit more than we need
- * for this, since it will also write out dirty buffers for
- * blocks we aren't deleting, but it's the closest thing in
- * bufmgr's API.
- */
- FlushRelationBuffers(rel, new_pages);
-
- /*
- * Do the physical truncation.
*/
RelationTruncate(rel, new_pages);
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/catalog/heap.c,v 1.280 2005/01/27 03:17:17 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/catalog/heap.c,v 1.281 2005/03/20 22:00:51 tgl Exp $
*
*
* INTERFACE ROUTINES
*/
rel = relation_open(relid, AccessExclusiveLock);
- /*
- * Release all buffers that belong to this relation, after writing any
- * that are dirty
- */
- FlushRelationBuffers(rel, (BlockNumber) 0);
-
/*
* Schedule unlinking of the relation's physical file at commit.
*/
/* Fetch info needed for index_build */
indexInfo = BuildIndexInfo(currentIndex);
- /*
- * Drop any buffers associated with this index. If they're dirty,
- * they're just dropped without bothering to flush to disk.
- */
- DropRelationBuffers(currentIndex);
-
- /* Now truncate the actual data */
+ /* Now truncate the actual file (and discard buffers) */
RelationTruncate(currentIndex, 0);
/* Initialize the index and rebuild */
{
Relation rel = lfirst(cell);
- /*
- * Release any buffers associated with this relation. If they're
- * dirty, they're just dropped without bothering to flush to disk.
- */
- DropRelationBuffers(rel);
-
- /* Now truncate the actual data */
+ /* Truncate the actual file (and discard buffers) */
RelationTruncate(rel, 0);
/* If this relation has indexes, truncate the indexes too */
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.247 2005/03/16 21:38:04 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.248 2005/03/20 22:00:51 tgl Exp $
*
*
* INTERFACE ROUTINES
LockRelation(userIndexRelation, AccessExclusiveLock);
/*
- * flush buffer cache and schedule physical removal of the file
+ * Schedule physical removal of the file
*/
- FlushRelationBuffers(userIndexRelation, (BlockNumber) 0);
-
RelationOpenSmgr(userIndexRelation);
smgrscheduleunlink(userIndexRelation->rd_smgr,
userIndexRelation->rd_istemp);
if (inplace)
{
- /*
- * Release any buffers associated with this index. If they're
- * dirty, they're just dropped without bothering to flush to
- * disk.
- */
- DropRelationBuffers(iRel);
-
- /* Now truncate the actual data */
+ /* Truncate the actual file (and discard buffers) */
RelationTruncate(iRel, 0);
}
else
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/cluster.c,v 1.132 2005/02/06 20:19:08 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/cluster.c,v 1.133 2005/03/20 22:00:52 tgl Exp $
*
*-------------------------------------------------------------------------
*/
void
swap_relation_files(Oid r1, Oid r2)
{
- Relation relRelation,
- rel;
+ Relation relRelation;
HeapTuple reltup1,
reltup2;
Form_pg_class relform1,
elog(ERROR, "cache lookup failed for relation %u", r2);
relform2 = (Form_pg_class) GETSTRUCT(reltup2);
- /*
- * The buffer manager gets confused if we swap relfilenodes for
- * relations that are not both local or non-local to this transaction.
- * Flush the buffers on both relations so the buffer manager can
- * forget about'em. (XXX this might not be necessary anymore?)
- */
- rel = relation_open(r1, NoLock);
- FlushRelationBuffers(rel, 0);
- relation_close(rel, NoLock);
-
- rel = relation_open(r2, NoLock);
- FlushRelationBuffers(rel, 0);
- relation_close(rel, NoLock);
-
/*
* Actually swap the fields in the two tuples
*/
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.147 2005/03/16 21:38:05 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.148 2005/03/20 22:00:52 tgl Exp $
*
*-------------------------------------------------------------------------
*/
Page page = (Page) buf;
/*
- * Since we copy the data directly without looking at the shared
+ * Since we copy the file directly without looking at the shared
* buffers, we'd better first flush out any pages of the source
- * relation that are in shared buffers. We assume no new pages will
- * get loaded into buffers while we are holding exclusive lock on the
- * rel.
+ * relation that are in shared buffers. We assume no new changes
+ * will be made while we are holding exclusive lock on the rel.
*/
- FlushRelationBuffers(rel, 0);
+ FlushRelationBuffers(rel);
/*
* We need to log the copied data in WAL iff WAL archiving is enabled
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.304 2005/03/16 21:38:05 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.305 2005/03/20 22:00:52 tgl Exp $
*
*-------------------------------------------------------------------------
*/
/* Clean pages from vacuum_pages list */
vacuum_heap(vacrelstats, onerel, &vacuum_pages);
}
- else
- {
- /*
- * Flush dirty pages out to disk. We must do this even if we
- * didn't do anything else, because we want to ensure that all
- * tuples have correct on-row commit status on disk (see
- * bufmgr.c's comments for FlushRelationBuffers()).
- */
- FlushRelationBuffers(onerel, vacrelstats->rel_pages);
- }
}
/* update shared free space map with final free space info */
pfree(Nvacpagelist.pagedesc);
}
- /*
- * Flush dirty pages out to disk. We do this unconditionally, even if
- * we don't need to truncate, because we want to ensure that all
- * tuples have correct on-row commit status on disk (see bufmgr.c's
- * comments for FlushRelationBuffers()).
- */
- FlushRelationBuffers(onerel, blkno);
-
- /* truncate relation, if needed */
+ /* Truncate relation, if needed */
if (blkno < nblocks)
{
RelationTruncate(onerel, blkno);
}
}
- /*
- * Flush dirty pages out to disk. We do this unconditionally, even if
- * we don't need to truncate, because we want to ensure that all
- * tuples have correct on-row commit status on disk (see bufmgr.c's
- * comments for FlushRelationBuffers()).
- */
+ /* Truncate relation if there are some empty end-pages */
Assert(vacrelstats->rel_pages >= vacuum_pages->empty_end_pages);
- relblocks = vacrelstats->rel_pages - vacuum_pages->empty_end_pages;
-
- FlushRelationBuffers(onerel, relblocks);
-
- /* truncate relation if there are some empty end-pages */
if (vacuum_pages->empty_end_pages > 0)
{
+ relblocks = vacrelstats->rel_pages - vacuum_pages->empty_end_pages;
ereport(elevel,
(errmsg("\"%s\": truncated %u to %u pages",
RelationGetRelationName(onerel),
vacrelstats->rel_pages, relblocks)));
RelationTruncate(onerel, relblocks);
- vacrelstats->rel_pages = relblocks; /* set new number of
- * blocks */
+ vacrelstats->rel_pages = relblocks; /* set new number of blocks */
}
}
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.50 2004/12/31 21:59:42 pgsql Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.51 2005/03/20 22:00:52 tgl Exp $
*
*-------------------------------------------------------------------------
*/
/*
* Okay to truncate.
- *
- * First, flush any shared buffers for the blocks we intend to delete.
- * FlushRelationBuffers is a bit more than we need for this, since it
- * will also write out dirty buffers for blocks we aren't deleting,
- * but it's the closest thing in bufmgr's API.
- */
- FlushRelationBuffers(onerel, new_rel_pages);
-
- /*
- * Do the physical truncation.
*/
RelationTruncate(onerel, new_rel_pages);
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.187 2005/03/18 05:25:23 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.188 2005/03/20 22:00:53 tgl Exp $
*
*-------------------------------------------------------------------------
*/
* RelationTruncate
* Physically truncate a relation to the specified number of blocks.
*
- * Caller should already have done something to flush any buffered pages
- * that are to be dropped.
+ * As of Postgres 8.1, this includes getting rid of any buffers for the
+ * blocks that are to be dropped; previously, callers had to do that.
*/
void
RelationTruncate(Relation rel, BlockNumber nblocks)
smgrtruncate(rel->rd_smgr, nblocks, rel->rd_istemp);
}
-/* ---------------------------------------------------------------------
- * DropRelationBuffers
- *
- * This function removes all the buffered pages for a relation
- * from the buffer pool. Dirty pages are simply dropped, without
- * bothering to write them out first. This is NOT rollback-able,
- * and so should be used only with extreme caution!
- *
- * There is no particularly good reason why this doesn't have a
- * firstDelBlock parameter, except that current callers don't need it.
- *
- * We assume that the caller holds an exclusive lock on the relation,
- * which should assure that no new buffers will be acquired for the rel
- * meanwhile.
- * --------------------------------------------------------------------
- */
-void
-DropRelationBuffers(Relation rel)
-{
- DropRelFileNodeBuffers(rel->rd_node, rel->rd_istemp, 0);
-}
-
/* ---------------------------------------------------------------------
* DropRelFileNodeBuffers
*
- * This is the same as DropRelationBuffers, except that the target
- * relation is specified by RelFileNode and temp status, and one
- * may specify the first block to drop.
+ * This function removes from the buffer pool all the pages of the
+ * specified relation that have block numbers >= firstDelBlock.
+ * (In particular, with firstDelBlock = 0, all pages are removed.)
+ * Dirty pages are simply dropped, without bothering to write them
+ * out first. Therefore, this is NOT rollback-able, and so should be
+ * used only with extreme caution!
+ *
+ * Currently, this is called only from smgr.c when the underlying file
+ * is about to be deleted or truncated (firstDelBlock is needed for
+ * the truncation case). The data in the affected pages would therefore
+ * be deleted momentarily anyway, and there is no point in writing it.
+ * It is the responsibility of higher-level code to ensure that the
+ * deletion or truncation does not lose any data that could be needed
+ * later. It is also the responsibility of higher-level code to ensure
+ * that no other process could be trying to load more pages of the
+ * relation into buffers.
*
- * This is NOT rollback-able. One legitimate use is to clear the
- * buffer cache of buffers for a relation that is being deleted
- * during transaction abort.
+ * XXX currently it sequentially searches the buffer pool, should be
+ * changed to more clever ways of searching. However, this routine
+ * is used only in code paths that aren't very performance-critical,
+ * and we shouldn't slow down the hot paths to make it faster ...
* --------------------------------------------------------------------
*/
void
* bothering to write them out first. This is used when we destroy a
* database, to avoid trying to flush data to disk when the directory
* tree no longer exists. Implementation is pretty similar to
- * DropRelationBuffers() which is for destroying just one relation.
+ * DropRelFileNodeBuffers() which is for destroying just one relation.
* --------------------------------------------------------------------
*/
void
/* ---------------------------------------------------------------------
* FlushRelationBuffers
*
- * This function writes all dirty pages of a relation out to disk.
- * Furthermore, pages that have blocknumber >= firstDelBlock are
- * actually removed from the buffer pool.
- *
- * This is called by DROP TABLE to clear buffers for the relation
- * from the buffer pool. Note that we must write dirty buffers,
- * rather than just dropping the changes, because our transaction
- * might abort later on; we want to roll back safely in that case.
- *
- * This is also called by VACUUM before truncating the relation to the
- * given number of blocks. It might seem unnecessary for VACUUM to
- * write dirty pages before firstDelBlock, since VACUUM should already
- * have committed its changes. However, it is possible for there still
- * to be dirty pages: if some page had unwritten on-row tuple status
- * updates from a prior transaction, and VACUUM had no additional
- * changes to make to that page, then VACUUM won't have written it.
- * This is harmless in most cases but will break pg_upgrade, which
- * relies on VACUUM to ensure that *all* tuples have correct on-row
- * status. So, we check and flush all dirty pages of the rel
- * regardless of block number.
- *
- * In all cases, the caller should be holding AccessExclusiveLock on
- * the target relation to ensure that no other backend is busy reading
- * more blocks of the relation (or might do so before we commit).
- * This should also ensure that no one is busy dirtying these blocks.
- *
- * Formerly, we considered it an error condition if we found dirty
- * buffers here. However, since BufferSync no longer forces out all
- * dirty buffers at every xact commit, it's possible for dirty buffers
- * to still be present in the cache due to failure of an earlier
- * transaction. So, must flush dirty buffers without complaint.
+ * This function writes all dirty pages of a relation out to disk
+ * (or more accurately, out to kernel disk buffers), ensuring that the
+ * kernel has an up-to-date view of the relation.
+ *
+ * Generally, the caller should be holding AccessExclusiveLock on the
+ * target relation to ensure that no other backend is busy dirtying
+ * more blocks of the relation; the effects can't be expected to last
+ * after the lock is released.
*
* XXX currently it sequentially searches the buffer pool, should be
- * changed to more clever ways of searching.
+ * changed to more clever ways of searching. This routine is not
+ * used in any performance-critical code paths, so it's not worth
+ * adding additional overhead to normal paths to make it go faster;
+ * but see also DropRelFileNodeBuffers.
* --------------------------------------------------------------------
*/
void
-FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
+FlushRelationBuffers(Relation rel)
{
int i;
BufferDesc *bufHdr;
for (i = 0; i < NLocBuffer; i++)
{
bufHdr = &LocalBufferDescriptors[i];
- if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node))
+ if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) &&
+ (bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY))
{
- if ((bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY))
- {
- ErrorContextCallback errcontext;
+ ErrorContextCallback errcontext;
- /* Setup error traceback support for ereport() */
- errcontext.callback = buffer_write_error_callback;
- errcontext.arg = bufHdr;
- errcontext.previous = error_context_stack;
- error_context_stack = &errcontext;
+ /* Setup error traceback support for ereport() */
+ errcontext.callback = buffer_write_error_callback;
+ errcontext.arg = bufHdr;
+ errcontext.previous = error_context_stack;
+ error_context_stack = &errcontext;
- smgrwrite(rel->rd_smgr,
- bufHdr->tag.blockNum,
- (char *) LocalBufHdrGetBlock(bufHdr),
- true);
+ smgrwrite(rel->rd_smgr,
+ bufHdr->tag.blockNum,
+ (char *) LocalBufHdrGetBlock(bufHdr),
+ true);
- bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
+ bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
- /* Pop the error context stack */
- error_context_stack = errcontext.previous;
- }
- if (LocalRefCount[i] > 0)
- elog(ERROR, "FlushRelationBuffers(\"%s\" (local), %u): block %u is referenced (%d)",
- RelationGetRelationName(rel), firstDelBlock,
- bufHdr->tag.blockNum, LocalRefCount[i]);
- if (bufHdr->tag.blockNum >= firstDelBlock)
- {
- CLEAR_BUFFERTAG(bufHdr->tag);
- bufHdr->flags = 0;
- bufHdr->usage_count = 0;
- }
+ /* Pop the error context stack */
+ error_context_stack = errcontext.previous;
}
}
for (i = 0; i < NBuffers; i++)
{
bufHdr = &BufferDescriptors[i];
- recheck:
LockBufHdr(bufHdr);
- if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node))
+ if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) &&
+ (bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY))
{
- if ((bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY))
- {
- PinBuffer_Locked(bufHdr);
- LWLockAcquire(bufHdr->content_lock, LW_SHARED);
- FlushBuffer(bufHdr, rel->rd_smgr);
- LWLockRelease(bufHdr->content_lock);
- UnpinBuffer(bufHdr, true, false /* no freelist change */ );
- /*
- * As soon as we unpin, it's possible for someone to take
- * the buffer away from us; so loop back to re-lock and
- * re-check if it still belongs to the target relation.
- */
- goto recheck;
- }
- /*
- * Even though it's not dirty, it could still be pinned because
- * TerminateIO and UnpinBuffer are separate actions. Hence,
- * we can't error out on nonzero reference count here.
- */
- if (bufHdr->tag.blockNum >= firstDelBlock)
- InvalidateBuffer(bufHdr); /* releases spinlock */
- else
- UnlockBufHdr(bufHdr);
+ PinBuffer_Locked(bufHdr);
+ LWLockAcquire(bufHdr->content_lock, LW_SHARED);
+ FlushBuffer(bufHdr, rel->rd_smgr);
+ LWLockRelease(bufHdr->content_lock);
+ UnpinBuffer(bufHdr, true, false /* no freelist change */ );
}
else
UnlockBufHdr(bufHdr);
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.85 2005/01/10 20:02:22 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.86 2005/03/20 22:00:53 tgl Exp $
*
*-------------------------------------------------------------------------
*/
smgr_internal_unlink(RelFileNode rnode, int which, bool isTemp, bool isRedo)
{
/*
- * Get rid of any leftover buffers for the rel (shouldn't be any in
- * the commit case, but there can be in the abort case).
+ * Get rid of any remaining buffers for the relation. bufmgr will just
+ * drop them without bothering to write the contents.
*/
DropRelFileNodeBuffers(rnode, isTemp, 0);
{
BlockNumber newblks;
+ /*
+ * Get rid of any buffers for the about-to-be-deleted blocks.
+ * bufmgr will just drop them without bothering to write the contents.
+ */
+ DropRelFileNodeBuffers(reln->smgr_rnode, isTemp, nblocks);
+
/*
* Tell the free space map to forget anything it may have stored for
* the about-to-be-deleted blocks. We want to be sure it won't return
reln = smgropen(xlrec->rnode);
+ /* Can't use smgrtruncate because it would try to xlog */
+
/*
* First, force bufmgr to drop any buffers it has for the to-be-
* truncated blocks. We must do this, else subsequent
*/
DropRelFileNodeBuffers(xlrec->rnode, false, xlrec->blkno);
- /* Can't use smgrtruncate because it would try to xlog */
-
/*
* Tell the free space map to forget anything it may have stored
* for the about-to-be-deleted blocks. We want to be sure it
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.92 2005/03/19 23:27:10 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.93 2005/03/20 22:00:54 tgl Exp $
*
*-------------------------------------------------------------------------
*/
extern BlockNumber BufferGetBlockNumber(Buffer buffer);
extern BlockNumber RelationGetNumberOfBlocks(Relation relation);
extern void RelationTruncate(Relation rel, BlockNumber nblocks);
-extern void FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock);
-extern void DropRelationBuffers(Relation rel);
+extern void FlushRelationBuffers(Relation rel);
extern void DropRelFileNodeBuffers(RelFileNode rnode, bool istemp,
BlockNumber firstDelBlock);
extern void DropBuffers(Oid dbid);