Scan the buffer pool just once, not once per fork, during relation drop.
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 7 Jun 2012 21:42:27 +0000 (17:42 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 7 Jun 2012 21:43:11 +0000 (17:43 -0400)
This provides a speedup of about 4X when NBuffers is large enough.
There is also a useful reduction in sinval traffic, since we
only do CacheInvalidateSmgr() once not once per fork.

Simon Riggs, reviewed and somewhat revised by Tom Lane

src/backend/access/transam/twophase.c
src/backend/access/transam/xact.c
src/backend/catalog/storage.c
src/backend/storage/buffer/bufmgr.c
src/backend/storage/buffer/localbuf.c
src/backend/storage/smgr/smgr.c
src/include/storage/buf_internals.h
src/include/storage/bufmgr.h
src/include/storage/smgr.h

index 6db46c00109277fdbe666bbd269bb293e6e8553a..0b41a76a3251461e3c53ed0d33ae8789a7c0d22f 100644 (file)
@@ -1356,12 +1356,8 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
        for (i = 0; i < ndelrels; i++)
        {
                SMgrRelation srel = smgropen(delrels[i], InvalidBackendId);
-               ForkNumber      fork;
 
-               for (fork = 0; fork <= MAX_FORKNUM; fork++)
-               {
-                       smgrdounlink(srel, fork, false);
-               }
+               smgrdounlink(srel, false);
                smgrclose(srel);
        }
 
index 49c14cb64c02a39cac4e6d4dfbcdeb015bcc2a53..659b53524cdf71dd8d36e1723f09838c49c82e1c 100644 (file)
@@ -4638,10 +4638,8 @@ xact_redo_commit_internal(TransactionId xid, XLogRecPtr lsn,
                ForkNumber      fork;
 
                for (fork = 0; fork <= MAX_FORKNUM; fork++)
-               {
                        XLogDropRelation(xnodes[i], fork);
-                       smgrdounlink(srel, fork, true);
-               }
+               smgrdounlink(srel, true);
                smgrclose(srel);
        }
 
@@ -4778,10 +4776,8 @@ xact_redo_abort(xl_xact_abort *xlrec, TransactionId xid)
                ForkNumber      fork;
 
                for (fork = 0; fork <= MAX_FORKNUM; fork++)
-               {
                        XLogDropRelation(xlrec->xnodes[i], fork);
-                       smgrdounlink(srel, fork, true);
-               }
+               smgrdounlink(srel, true);
                smgrclose(srel);
        }
 }
index a017101b7664c73e468f62e67f4c1ab1d65476a7..97ca95b6c8d37b35cc3d4fa1085480a3375940de 100644 (file)
@@ -356,13 +356,9 @@ smgrDoPendingDeletes(bool isCommit)
                        if (pending->atCommit == isCommit)
                        {
                                SMgrRelation srel;
-                               int                     i;
 
                                srel = smgropen(pending->relnode, pending->backend);
-                               for (i = 0; i <= MAX_FORKNUM; i++)
-                               {
-                                       smgrdounlink(srel, i, false);
-                               }
+                               smgrdounlink(srel, false);
                                smgrclose(srel);
                        }
                        /* must explicitly free the list entry */
index b178eee22145c66d2f870c57c8b76724a0b0c170..d46faaf958d4310da39522c4ada193bedfd62df1 100644 (file)
@@ -2020,7 +2020,7 @@ BufferIsPermanent(Buffer buffer)
  *             DropRelFileNodeBuffers
  *
  *             This function removes from the buffer pool all the pages of the
- *             specified relation that have block numbers >= firstDelBlock.
+ *             specified relation fork that have block numbers >= firstDelBlock.
  *             (In particular, with firstDelBlock = 0, all pages are removed.)
  *             Dirty pages are simply dropped, without bothering to write them
  *             out first.      Therefore, this is NOT rollback-able, and so should be
@@ -2089,6 +2089,46 @@ DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber forkNum,
        }
 }
 
+/* ---------------------------------------------------------------------
+ *             DropRelFileNodeAllBuffers
+ *
+ *             This function removes from the buffer pool all the pages of all
+ *             forks of the specified relation.  It's equivalent to calling
+ *             DropRelFileNodeBuffers once per fork with firstDelBlock = 0.
+ * --------------------------------------------------------------------
+ */
+void
+DropRelFileNodeAllBuffers(RelFileNodeBackend rnode)
+{
+       int                     i;
+
+       /* If it's a local relation, it's localbuf.c's problem. */
+       if (rnode.backend != InvalidBackendId)
+       {
+               if (rnode.backend == MyBackendId)
+                       DropRelFileNodeAllLocalBuffers(rnode.node);
+               return;
+       }
+
+       for (i = 0; i < NBuffers; i++)
+       {
+               volatile BufferDesc *bufHdr = &BufferDescriptors[i];
+
+               /*
+                * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
+                * and saves some cycles.
+                */
+               if (!RelFileNodeEquals(bufHdr->tag.rnode, rnode.node))
+                       continue;
+
+               LockBufHdr(bufHdr);
+               if (RelFileNodeEquals(bufHdr->tag.rnode, rnode.node))
+                       InvalidateBuffer(bufHdr);       /* releases spinlock */
+               else
+                       UnlockBufHdr(bufHdr);
+       }
+}
+
 /* ---------------------------------------------------------------------
  *             DropDatabaseBuffers
  *
index 63c14f7300cc48d5d25be991a28c3cf80bed54f1..46eeaf742d864c7193831126d4604bbdcd3743f2 100644 (file)
@@ -330,6 +330,46 @@ DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum,
        }
 }
 
+/*
+ * DropRelFileNodeAllLocalBuffers
+ *             This function removes from the buffer pool all pages of all forks
+ *             of the specified relation.
+ *
+ *             See DropRelFileNodeAllBuffers in bufmgr.c for more notes.
+ */
+void
+DropRelFileNodeAllLocalBuffers(RelFileNode rnode)
+{
+       int                     i;
+
+       for (i = 0; i < NLocBuffer; i++)
+       {
+               BufferDesc *bufHdr = &LocalBufferDescriptors[i];
+               LocalBufferLookupEnt *hresult;
+
+               if ((bufHdr->flags & BM_TAG_VALID) &&
+                       RelFileNodeEquals(bufHdr->tag.rnode, rnode))
+               {
+                       if (LocalRefCount[i] != 0)
+                               elog(ERROR, "block %u of %s is still referenced (local %u)",
+                                        bufHdr->tag.blockNum,
+                                        relpathbackend(bufHdr->tag.rnode, MyBackendId,
+                                                                       bufHdr->tag.forkNum),
+                                        LocalRefCount[i]);
+                       /* Remove entry from hashtable */
+                       hresult = (LocalBufferLookupEnt *)
+                               hash_search(LocalBufHash, (void *) &bufHdr->tag,
+                                                       HASH_REMOVE, NULL);
+                       if (!hresult)           /* shouldn't happen */
+                               elog(ERROR, "local buffer hash table corrupted");
+                       /* Mark buffer invalid */
+                       CLEAR_BUFFERTAG(bufHdr->tag);
+                       bufHdr->flags = 0;
+                       bufHdr->usage_count = 0;
+               }
+       }
+}
+
 /*
  * InitLocalBuffers -
  *       init the local buffer cache. Since most queries (esp. multi-user ones)
index 5f87543bb0331470b956d56f9512377d4d4e8681..60eb81f774f280d378dd9b04acc8a69c1f50be5c 100644 (file)
@@ -329,7 +329,64 @@ smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
 }
 
 /*
- *     smgrdounlink() -- Immediately unlink a relation.
+ *     smgrdounlink() -- Immediately unlink all forks of a relation.
+ *
+ *             All forks of the relation are removed from the store.  This should
+ *             not be used during transactional operations, since it can't be undone.
+ *
+ *             If isRedo is true, it is okay for the underlying file(s) to be gone
+ *             already.
+ *
+ *             This is equivalent to calling smgrdounlinkfork for each fork, but
+ *             it's significantly quicker so should be preferred when possible.
+ */
+void
+smgrdounlink(SMgrRelation reln, bool isRedo)
+{
+       RelFileNodeBackend rnode = reln->smgr_rnode;
+       int                     which = reln->smgr_which;
+       ForkNumber      forknum;
+
+       /* Close the forks at smgr level */
+       for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
+               (*(smgrsw[which].smgr_close)) (reln, forknum);
+
+       /*
+        * Get rid of any remaining buffers for the relation.  bufmgr will just
+        * drop them without bothering to write the contents.
+        */
+       DropRelFileNodeAllBuffers(rnode);
+
+       /*
+        * It'd be nice to tell the stats collector to forget it immediately, too.
+        * But we can't because we don't know the OID (and in cases involving
+        * relfilenode swaps, it's not always clear which table OID to forget,
+        * anyway).
+        */
+
+       /*
+        * Send a shared-inval message to force other backends to close any
+        * dangling smgr references they may have for this rel.  We should do this
+        * before starting the actual unlinking, in case we fail partway through
+        * that step.  Note that the sinval message will eventually come back to
+        * this backend, too, and thereby provide a backstop that we closed our
+        * own smgr rel.
+        */
+       CacheInvalidateSmgr(rnode);
+
+       /*
+        * Delete the physical file(s).
+        *
+        * Note: smgr_unlink must treat deletion failure as a WARNING, not an
+        * ERROR, because we've already decided to commit or abort the current
+        * xact.
+        */
+       for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
+               (*(smgrsw[which].smgr_unlink)) (rnode, forknum, isRedo);
+}
+
+/*
+ *     smgrdounlinkfork() -- Immediately unlink one fork of a relation.
  *
  *             The specified fork of the relation is removed from the store.  This
  *             should not be used during transactional operations, since it can't be
@@ -339,16 +396,16 @@ smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
  *             already.
  */
 void
-smgrdounlink(SMgrRelation reln, ForkNumber forknum, bool isRedo)
+smgrdounlinkfork(SMgrRelation reln, ForkNumber forknum, bool isRedo)
 {
        RelFileNodeBackend rnode = reln->smgr_rnode;
        int                     which = reln->smgr_which;
 
-       /* Close the fork */
+       /* Close the fork at smgr level */
        (*(smgrsw[which].smgr_close)) (reln, forknum);
 
        /*
-        * Get rid of any remaining buffers for the relation.  bufmgr will just
+        * Get rid of any remaining buffers for the fork.  bufmgr will just
         * drop them without bothering to write the contents.
         */
        DropRelFileNodeBuffers(rnode, forknum, 0);
index 4129ce524425a4d8253efed3fd665c807a600028..43f83d1eccc0c91d26bc61af0e73a5d6eb933e6d 100644 (file)
@@ -210,6 +210,7 @@ extern BufferDesc *LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum,
 extern void MarkLocalBufferDirty(Buffer buffer);
 extern void DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum,
                                                        BlockNumber firstDelBlock);
+extern void DropRelFileNodeAllLocalBuffers(RelFileNode rnode);
 extern void AtEOXact_LocalBuffers(bool isCommit);
 
 #endif   /* BUFMGR_INTERNALS_H */
index 17fc7cb9420b878a1f9f80260c72a5a908a573b5..51eb77b689566712293e805edd462668f4cafb41 100644 (file)
@@ -188,6 +188,7 @@ extern void FlushRelationBuffers(Relation rel);
 extern void FlushDatabaseBuffers(Oid dbid);
 extern void DropRelFileNodeBuffers(RelFileNodeBackend rnode,
                                           ForkNumber forkNum, BlockNumber firstDelBlock);
+extern void DropRelFileNodeAllBuffers(RelFileNodeBackend rnode);
 extern void DropDatabaseBuffers(Oid dbid);
 
 #define RelationGetNumberOfBlocks(reln) \
index 46c84024fa2b706a747295a40991c0b99a911fdd..f1e1b8cdefd2d27dde164a945c2b2e9bb04717b1 100644 (file)
@@ -80,8 +80,8 @@ extern void smgrclose(SMgrRelation reln);
 extern void smgrcloseall(void);
 extern void smgrclosenode(RelFileNodeBackend rnode);
 extern void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo);
-extern void smgrdounlink(SMgrRelation reln, ForkNumber forknum,
-                        bool isRedo);
+extern void smgrdounlink(SMgrRelation reln, bool isRedo);
+extern void smgrdounlinkfork(SMgrRelation reln, ForkNumber forknum, bool isRedo);
 extern void smgrextend(SMgrRelation reln, ForkNumber forknum,
                   BlockNumber blocknum, char *buffer, bool skipFsync);
 extern void smgrprefetch(SMgrRelation reln, ForkNumber forknum,