Make large sequential scans and VACUUMs work in a limited-size "ring" of

author Tom Lane <tgl@sss.pgh.pa.us>

Wed, 30 May 2007 20:12:03 +0000 (20:12 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Wed, 30 May 2007 20:12:03 +0000 (20:12 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Wed, 30 May 2007 20:12:03 +0000 (20:12 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Wed, 30 May 2007 20:12:03 +0000 (20:12 +0000)
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c

index 3d9b8064fc111873dc208f8556ad1f3f7a1e60e8..57c54224719f8fbc33d76d44082ae4f16b3533bb 100644 (file)
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.94 2007/05/03 16:45:58 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.95 2007/05/30 20:11:51 tgl Exp $
   *
   * NOTES
   *       This file contains only the public interface routines.
@@ -547,8 +547,9 @@ loop_top:
  
                         vacuum_delay_point();
  
-                       buf = _hash_getbuf(rel, blkno, HASH_WRITE,
-                                                          LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
+                       buf = _hash_getbuf_with_strategy(rel, blkno, HASH_WRITE,
+                                                                                        LH_BUCKET_PAGE | LH_OVERFLOW_PAGE,
+                                                                                        info->strategy);
                         page = BufferGetPage(buf);
                         opaque = (HashPageOpaque) PageGetSpecialPointer(page);
                         Assert(opaque->hasho_bucket == cur_bucket);
@@ -596,7 +597,8 @@ loop_top:
  
                 /* If we deleted anything, try to compact free space */
                 if (bucket_dirty)
-                       _hash_squeezebucket(rel, cur_bucket, bucket_blkno);
+                       _hash_squeezebucket(rel, cur_bucket, bucket_blkno,
+                                                               info->strategy);
  
                 /* Release bucket lock */
                 _hash_droplock(rel, bucket_blkno, HASH_EXCLUSIVE);
diff --git a/src/backend/access/hash/hashovfl.c b/src/backend/access/hash/hashovfl.c

index 1f71f18b7c68af4dd378842a8f151b23009c754f..889bbcdb1a9ea660f9f1119e813b759277ec1d86 100644 (file)
--- a/src/backend/access/hash/hashovfl.c
+++ b/src/backend/access/hash/hashovfl.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.57 2007/05/03 16:45:58 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.58 2007/05/30 20:11:51 tgl Exp $
   *
   * NOTES
   *       Overflow pages look like ordinary relation pages.
@@ -362,6 +362,9 @@ _hash_firstfreebit(uint32 map)
   *     Remove this overflow page from its bucket's chain, and mark the page as
   *     free.  On entry, ovflbuf is write-locked; it is released before exiting.
   *
+ *     Since this function is invoked in VACUUM, we provide an access strategy
+ *     parameter that controls fetches of the bucket pages.
+ *
   *     Returns the block number of the page that followed the given page
   *     in the bucket, or InvalidBlockNumber if no following page.
   *
@@ -370,7 +373,8 @@ _hash_firstfreebit(uint32 map)
   *     on the bucket, too.
   */
  BlockNumber
-_hash_freeovflpage(Relation rel, Buffer ovflbuf)
+_hash_freeovflpage(Relation rel, Buffer ovflbuf,
+                                  BufferAccessStrategy bstrategy)
  {
         HashMetaPage metap;
         Buffer          metabuf;
@@ -413,8 +417,11 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf)
          */
         if (BlockNumberIsValid(prevblkno))
         {
-               Buffer          prevbuf = _hash_getbuf(rel, prevblkno, HASH_WRITE,
-                                                                                  LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
+               Buffer          prevbuf = _hash_getbuf_with_strategy(rel,
+                                                                                                                prevblkno,
+                                                                                                                HASH_WRITE,
+                                                                                                                LH_BUCKET_PAGE | LH_OVERFLOW_PAGE,
+                                                                                                                bstrategy);
                 Page            prevpage = BufferGetPage(prevbuf);
                 HashPageOpaque prevopaque = (HashPageOpaque) PageGetSpecialPointer(prevpage);
  
@@ -424,8 +431,11 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf)
         }
         if (BlockNumberIsValid(nextblkno))
         {
-               Buffer          nextbuf = _hash_getbuf(rel, nextblkno, HASH_WRITE,
-                                                                                  LH_OVERFLOW_PAGE);
+               Buffer          nextbuf = _hash_getbuf_with_strategy(rel,
+                                                                                                                nextblkno,
+                                                                                                                HASH_WRITE,
+                                                                                                                LH_OVERFLOW_PAGE,
+                                                                                                                bstrategy);
                 Page            nextpage = BufferGetPage(nextbuf);
                 HashPageOpaque nextopaque = (HashPageOpaque) PageGetSpecialPointer(nextpage);
  
@@ -434,6 +444,8 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf)
                 _hash_wrtbuf(rel, nextbuf);
         }
  
+       /* Note: bstrategy is intentionally not used for metapage and bitmap */
+
         /* Read the metapage so we can determine which bitmap page to use */
         metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
         metap = (HashMetaPage) BufferGetPage(metabuf);
@@ -558,11 +570,15 @@ _hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno)
   *
   *     Caller must hold exclusive lock on the target bucket.  This allows
   *     us to safely lock multiple pages in the bucket.
+ *
+ *     Since this function is invoked in VACUUM, we provide an access strategy
+ *     parameter that controls fetches of the bucket pages.
   */
  void
  _hash_squeezebucket(Relation rel,
                                         Bucket bucket,
-                                       BlockNumber bucket_blkno)
+                                       BlockNumber bucket_blkno,
+                                       BufferAccessStrategy bstrategy)
  {
         Buffer          wbuf;
         Buffer          rbuf = 0;
@@ -581,7 +597,11 @@ _hash_squeezebucket(Relation rel,
          * start squeezing into the base bucket page.
          */
         wblkno = bucket_blkno;
-       wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE, LH_BUCKET_PAGE);
+       wbuf = _hash_getbuf_with_strategy(rel,
+                                                                         wblkno,
+                                                                         HASH_WRITE,
+                                                                         LH_BUCKET_PAGE,
+                                                                         bstrategy);
         wpage = BufferGetPage(wbuf);
         wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);
  
@@ -595,8 +615,10 @@ _hash_squeezebucket(Relation rel,
         }
  
         /*
-        * find the last page in the bucket chain by starting at the base bucket
-        * page and working forward.
+        * Find the last page in the bucket chain by starting at the base bucket
+        * page and working forward.  Note: we assume that a hash bucket chain is
+        * usually smaller than the buffer ring being used by VACUUM, else using
+        * the access strategy here would be counterproductive.
          */
         ropaque = wopaque;
         do
@@ -604,7 +626,11 @@ _hash_squeezebucket(Relation rel,
                 rblkno = ropaque->hasho_nextblkno;
                 if (ropaque != wopaque)
                         _hash_relbuf(rel, rbuf);
-               rbuf = _hash_getbuf(rel, rblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
+               rbuf = _hash_getbuf_with_strategy(rel,
+                                                                                 rblkno,
+                                                                                 HASH_WRITE,
+                                                                                 LH_OVERFLOW_PAGE,
+                                                                                 bstrategy);
                 rpage = BufferGetPage(rbuf);
                 ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage);
                 Assert(ropaque->hasho_bucket == bucket);
@@ -644,7 +670,11 @@ _hash_squeezebucket(Relation rel,
                                         return;
                                 }
  
-                               wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
+                               wbuf = _hash_getbuf_with_strategy(rel,
+                                                                                                 wblkno,
+                                                                                                 HASH_WRITE,
+                                                                                                 LH_OVERFLOW_PAGE,
+                                                                                                 bstrategy);
                                 wpage = BufferGetPage(wbuf);
                                 wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);
                                 Assert(wopaque->hasho_bucket == bucket);
@@ -688,15 +718,19 @@ _hash_squeezebucket(Relation rel,
                                 /* yes, so release wbuf lock first */
                                 _hash_wrtbuf(rel, wbuf);
                                 /* free this overflow page (releases rbuf) */
-                               _hash_freeovflpage(rel, rbuf);
+                               _hash_freeovflpage(rel, rbuf, bstrategy);
                                 /* done */
                                 return;
                         }
  
                         /* free this overflow page, then get the previous one */
-                       _hash_freeovflpage(rel, rbuf);
+                       _hash_freeovflpage(rel, rbuf, bstrategy);
  
-                       rbuf = _hash_getbuf(rel, rblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
+                       rbuf = _hash_getbuf_with_strategy(rel,
+                                                                                         rblkno,
+                                                                                         HASH_WRITE,
+                                                                                         LH_OVERFLOW_PAGE,
+                                                                                         bstrategy);
                         rpage = BufferGetPage(rbuf);
                         ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage);
                         Assert(ropaque->hasho_bucket == bucket);
diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c

index a27d83d4ffff3b772f730fee35fa791825e7395c..29d861efb868ce85f14d759c3978945e5f87a732 100644 (file)
--- a/src/backend/access/hash/hashpage.c
+++ b/src/backend/access/hash/hashpage.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.67 2007/05/03 16:45:58 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.68 2007/05/30 20:11:51 tgl Exp $
   *
   * NOTES
   *       Postgres hash pages look like ordinary relation pages.  The opaque
@@ -214,6 +214,34 @@ _hash_getnewbuf(Relation rel, BlockNumber blkno)
         return buf;
  }
  
+/*
+ *     _hash_getbuf_with_strategy() -- Get a buffer with nondefault strategy.
+ *
+ *             This is identical to _hash_getbuf() but also allows a buffer access
+ *             strategy to be specified.  We use this for VACUUM operations.
+ */
+Buffer
+_hash_getbuf_with_strategy(Relation rel, BlockNumber blkno,
+                                                  int access, int flags,
+                                                  BufferAccessStrategy bstrategy)
+{
+       Buffer          buf;
+
+       if (blkno == P_NEW)
+               elog(ERROR, "hash AM does not use P_NEW");
+
+       buf = ReadBufferWithStrategy(rel, blkno, bstrategy);
+
+       if (access != HASH_NOLOCK)
+               LockBuffer(buf, access);
+
+       /* ref count and lock type are correct */
+
+       _hash_checkpage(rel, buf, flags);
+
+       return buf;
+}
+
  /*
   *     _hash_relbuf() -- release a locked buffer.
   *
@@ -840,5 +868,5 @@ _hash_splitbucket(Relation rel,
         _hash_wrtbuf(rel, obuf);
         _hash_wrtbuf(rel, nbuf);
  
-       _hash_squeezebucket(rel, obucket, start_oblkno);
+       _hash_squeezebucket(rel, obucket, start_oblkno, NULL);
  }
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c

index 9edeaff130686ade0f026a977095d9440b0b25a4..0b20e5e9a8db2671ee9331a20bfcfc868aeea588 100644 (file)
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.233 2007/05/27 03:50:38 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.234 2007/05/30 20:11:53 tgl Exp $
   *
   *
   * INTERFACE ROUTINES
@@ -83,6 +83,24 @@ initscan(HeapScanDesc scan, ScanKey key)
          */
         scan->rs_nblocks = RelationGetNumberOfBlocks(scan->rs_rd);
  
+       /*
+        * If the table is large relative to NBuffers, use a bulk-read access
+        * strategy, else use the default random-access strategy.  During a
+        * rescan, don't make a new strategy object if we don't have to.
+        */
+       if (scan->rs_nblocks > NBuffers / 4 &&
+               !scan->rs_rd->rd_istemp)
+       {
+               if (scan->rs_strategy == NULL)
+                       scan->rs_strategy = GetAccessStrategy(BAS_BULKREAD);
+       }
+       else
+       {
+               if (scan->rs_strategy != NULL)
+                       FreeAccessStrategy(scan->rs_strategy);
+               scan->rs_strategy = NULL;
+       }
+
         scan->rs_inited = false;
         scan->rs_ctup.t_data = NULL;
         ItemPointerSetInvalid(&scan->rs_ctup.t_self);
@@ -123,9 +141,17 @@ heapgetpage(HeapScanDesc scan, BlockNumber page)
  
         Assert(page < scan->rs_nblocks);
  
-       scan->rs_cbuf = ReleaseAndReadBuffer(scan->rs_cbuf,
-                                                                                scan->rs_rd,
-                                                                                page);
+       /* release previous scan buffer, if any */
+       if (BufferIsValid(scan->rs_cbuf))
+       {
+               ReleaseBuffer(scan->rs_cbuf);
+               scan->rs_cbuf = InvalidBuffer;
+       }
+
+       /* read page using selected strategy */
+       scan->rs_cbuf = ReadBufferWithStrategy(scan->rs_rd,
+                                                                                  page,
+                                                                                  scan->rs_strategy);
         scan->rs_cblock = page;
  
         if (!scan->rs_pageatatime)
@@ -938,6 +964,7 @@ heap_beginscan(Relation relation, Snapshot snapshot,
         scan->rs_rd = relation;
         scan->rs_snapshot = snapshot;
         scan->rs_nkeys = nkeys;
+       scan->rs_strategy = NULL;       /* set in initscan */
  
         /*
          * we can use page-at-a-time mode if it's an MVCC-safe snapshot
@@ -1007,6 +1034,9 @@ heap_endscan(HeapScanDesc scan)
         if (scan->rs_key)
                 pfree(scan->rs_key);
  
+       if (scan->rs_strategy != NULL)
+               FreeAccessStrategy(scan->rs_strategy);
+
         pfree(scan);
  }
  
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c

index 87f848550556d6e1a638d10932bb4aecb2b5a9f3..a4ba3d3cdf42fbaa31158441abdd2d917160f66c 100644 (file)
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -12,7 +12,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.154 2007/01/05 22:19:23 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.155 2007/05/30 20:11:53 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -786,9 +786,10 @@ restart:
         /*
          * We can't use _bt_getbuf() here because it always applies
          * _bt_checkpage(), which will barf on an all-zero page. We want to
-        * recycle all-zero pages, not fail.
+        * recycle all-zero pages, not fail.  Also, we want to use a nondefault
+        * buffer access strategy.
          */
-       buf = ReadBuffer(rel, blkno);
+       buf = ReadBufferWithStrategy(rel, blkno, info->strategy);
         LockBuffer(buf, BT_READ);
         page = BufferGetPage(buf);
         opaque = (BTPageOpaque) PageGetSpecialPointer(page);
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c

index 3dc00499bfb441c8bd79391910dc511963a08a09..4ca4aa754c6d560b4e59c9522856e3f1f6aafe48 100644 (file)
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.269 2007/05/20 21:08:19 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.270 2007/05/30 20:11:55 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1799,6 +1799,36 @@ XLogFlush(XLogRecPtr record)
                          LogwrtResult.Flush.xlogid, LogwrtResult.Flush.xrecoff);
  }
  
+/*
+ * Test whether XLOG data has been flushed up to (at least) the given position.
+ *
+ * Returns true if a flush is still needed.  (It may be that someone else
+ * is already in process of flushing that far, however.)
+ */
+bool
+XLogNeedsFlush(XLogRecPtr record)
+{
+       /* Quick exit if already known flushed */
+       if (XLByteLE(record, LogwrtResult.Flush))
+               return false;
+
+       /* read LogwrtResult and update local state */
+       {
+               /* use volatile pointer to prevent code rearrangement */
+               volatile XLogCtlData *xlogctl = XLogCtl;
+
+               SpinLockAcquire(&xlogctl->info_lck);
+               LogwrtResult = xlogctl->LogwrtResult;
+               SpinLockRelease(&xlogctl->info_lck);
+       }
+
+       /* check again */
+       if (XLByteLE(record, LogwrtResult.Flush))
+               return false;
+
+       return true;
+}
+
  /*
   * Create a new XLOG file segment, or open a pre-existing one.
   *
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c

index bd3ae31b076c4fc8c512fdf495fade6fc42ebcef..9aa58e35f9a5d36052f8197e0a0e588e0a045e57 100644 (file)
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.283 2007/05/16 17:28:20 alvherre Exp $
+ *       $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.284 2007/05/30 20:11:55 tgl Exp $
   *
   *
   * INTERFACE ROUTINES
@@ -1658,6 +1658,7 @@ validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
         ivinfo.vacuum_full = false;
         ivinfo.message_level = DEBUG2;
         ivinfo.num_heap_tuples = -1;
+       ivinfo.strategy = NULL;
  
         state.tuplesort = tuplesort_begin_datum(TIDOID,
                                                                                         TIDLessOperator, false,
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c

index 2754a6db6a2230ee5563d21744c49b12f51fcbc6..d77aec2dd74f4728cee1b2e0758ef1503374e3b3 100644 (file)
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.107 2007/04/30 03:23:48 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.108 2007/05/30 20:11:56 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -63,10 +63,13 @@ typedef struct AnlIndexData
  /* Default statistics target (GUC parameter) */
  int                    default_statistics_target = 10;
  
+/* A few variables that don't seem worth passing around as parameters */
  static int     elevel = -1;
  
  static MemoryContext anl_context = NULL;
  
+static BufferAccessStrategy vac_strategy;
+
  
  static void BlockSampler_Init(BlockSampler bs, BlockNumber nblocks,
                                   int samplesize);
@@ -94,7 +97,8 @@ static bool std_typanalyze(VacAttrStats *stats);
   *     analyze_rel() -- analyze one relation
   */
  void
-analyze_rel(Oid relid, VacuumStmt *vacstmt)
+analyze_rel(Oid relid, VacuumStmt *vacstmt,
+                       BufferAccessStrategy bstrategy)
  {
         Relation        onerel;
         int                     attr_cnt,
@@ -120,6 +124,8 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt)
         else
                 elevel = DEBUG2;
  
+       vac_strategy = bstrategy;
+
         /*
          * Use the current context for storing analysis info.  vacuum.c ensures
          * that this context will be cleared when I return, thus releasing the
@@ -845,7 +851,7 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
                  * looking at it.  We don't maintain a lock on the page, so tuples
                  * could get added to it, but we ignore such tuples.
                  */
-               targbuffer = ReadBuffer(onerel, targblock);
+               targbuffer = ReadBufferWithStrategy(onerel, targblock, vac_strategy);
                 LockBuffer(targbuffer, BUFFER_LOCK_SHARE);
                 targpage = BufferGetPage(targbuffer);
                 maxoffset = PageGetMaxOffsetNumber(targpage);
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c

index 93885579cbb39a49e7e99937c4a24ad3ecfaa192..cf4c34141299b68e63a3be992325896a2e6cfa7d 100644 (file)
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -13,7 +13,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.351 2007/05/17 15:28:29 alvherre Exp $
+ *       $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.352 2007/05/30 20:11:57 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -191,6 +191,7 @@ ExecContext_Finish(ExecContext ec)
   *----------------------------------------------------------------------
   */
  
+/* A few variables that don't seem worth passing around as parameters */
  static MemoryContext vac_context = NULL;
  
  static int     elevel = -1;
@@ -198,6 +199,8 @@ static int  elevel = -1;
  static TransactionId OldestXmin;
  static TransactionId FreezeLimit;
  
+static BufferAccessStrategy vac_strategy;
+
  
  /* non-export function prototypes */
  static List *get_rel_oids(List *relids, const RangeVar *vacrel,
@@ -257,14 +260,18 @@ static Size PageGetFreeSpaceWithFillFactor(Relation relation, Page page);
   * relation OIDs to be processed, and vacstmt->relation is ignored.
   * (The non-NIL case is currently only used by autovacuum.)
   *
+ * bstrategy is normally given as NULL, but in autovacuum it can be passed
+ * in to use the same buffer strategy object across multiple vacuum() calls.
+ *
   * isTopLevel should be passed down from ProcessUtility.
   *
- * It is the caller's responsibility that both vacstmt and relids
+ * It is the caller's responsibility that vacstmt, relids, and bstrategy
   * (if given) be allocated in a memory context that won't disappear
   * at transaction commit.
   */
  void
-vacuum(VacuumStmt *vacstmt, List *relids, bool isTopLevel)
+vacuum(VacuumStmt *vacstmt, List *relids,
+          BufferAccessStrategy bstrategy, bool isTopLevel)
  {
         const char *stmttype = vacstmt->vacuum ? "VACUUM" : "ANALYZE";
         volatile MemoryContext anl_context = NULL;
@@ -319,6 +326,19 @@ vacuum(VacuumStmt *vacstmt, List *relids, bool isTopLevel)
                                                                                 ALLOCSET_DEFAULT_INITSIZE,
                                                                                 ALLOCSET_DEFAULT_MAXSIZE);
  
+       /*
+        * If caller didn't give us a buffer strategy object, make one in the
+        * cross-transaction memory context.
+        */
+       if (bstrategy == NULL)
+       {
+               MemoryContext old_context = MemoryContextSwitchTo(vac_context);
+
+               bstrategy = GetAccessStrategy(BAS_VACUUM);
+               MemoryContextSwitchTo(old_context);
+       }
+       vac_strategy = bstrategy;
+
         /* Remember whether we are processing everything in the DB */
         all_rels = (relids == NIL && vacstmt->relation == NULL);
  
@@ -417,15 +437,7 @@ vacuum(VacuumStmt *vacstmt, List *relids, bool isTopLevel)
                                 else
                                         old_context = MemoryContextSwitchTo(anl_context);
  
-                               /*
-                                * Tell the buffer replacement strategy that vacuum is causing
-                                * the IO
-                                */
-                               StrategyHintVacuum(true);
-
-                               analyze_rel(relid, vacstmt);
-
-                               StrategyHintVacuum(false);
+                               analyze_rel(relid, vacstmt, vac_strategy);
  
                                 if (use_own_xacts)
                                         CommitTransactionCommand();
@@ -441,8 +453,6 @@ vacuum(VacuumStmt *vacstmt, List *relids, bool isTopLevel)
         {
                 /* Make sure cost accounting is turned off after error */
                 VacuumCostActive = false;
-               /* And reset buffer replacement strategy, too */
-               StrategyHintVacuum(false);
                 PG_RE_THROW();
         }
         PG_END_TRY();
@@ -1084,21 +1094,13 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, char expected_relkind)
          */
         toast_relid = onerel->rd_rel->reltoastrelid;
  
-       /*
-        * Tell the cache replacement strategy that vacuum is causing all
-        * following IO
-        */
-       StrategyHintVacuum(true);
-
         /*
          * Do the actual work --- either FULL or "lazy" vacuum
          */
         if (vacstmt->full)
                 full_vacuum_rel(onerel, vacstmt);
         else
-               lazy_vacuum_rel(onerel, vacstmt);
-
-       StrategyHintVacuum(false);
+               lazy_vacuum_rel(onerel, vacstmt, vac_strategy);
  
         /* all done with this class, but hold lock until commit */
         relation_close(onerel, NoLock);
@@ -1290,7 +1292,7 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
  
                 vacuum_delay_point();
  
-               buf = ReadBuffer(onerel, blkno);
+               buf = ReadBufferWithStrategy(onerel, blkno, vac_strategy);
                 page = BufferGetPage(buf);
  
                 /*
@@ -1730,7 +1732,7 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
                 /*
                  * Process this page of relation.
                  */
-               buf = ReadBuffer(onerel, blkno);
+               buf = ReadBufferWithStrategy(onerel, blkno, vac_strategy);
                 page = BufferGetPage(buf);
  
                 vacpage->offsets_free = 0;
@@ -1954,8 +1956,9 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
                                         nextTid = tp.t_data->t_ctid;
                                         priorXmax = HeapTupleHeaderGetXmax(tp.t_data);
                                         /* assume block# is OK (see heap_fetch comments) */
-                                       nextBuf = ReadBuffer(onerel,
-                                                                                ItemPointerGetBlockNumber(&nextTid));
+                                       nextBuf = ReadBufferWithStrategy(onerel,
+                                                                                ItemPointerGetBlockNumber(&nextTid),
+                                                                                                        vac_strategy);
                                         nextPage = BufferGetPage(nextBuf);
                                         /* If bogus or unused slot, assume tp is end of chain */
                                         nextOffnum = ItemPointerGetOffsetNumber(&nextTid);
@@ -2091,8 +2094,9 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
                                                 break;  /* out of check-all-items loop */
                                         }
                                         tp.t_self = vtlp->this_tid;
-                                       Pbuf = ReadBuffer(onerel,
-                                                                       ItemPointerGetBlockNumber(&(tp.t_self)));
+                                       Pbuf = ReadBufferWithStrategy(onerel,
+                                                                       ItemPointerGetBlockNumber(&(tp.t_self)),
+                                                                                                 vac_strategy);
                                         Ppage = BufferGetPage(Pbuf);
                                         Pitemid = PageGetItemId(Ppage,
                                                                    ItemPointerGetOffsetNumber(&(tp.t_self)));
@@ -2174,11 +2178,14 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
  
                                         /* Get page to move from */
                                         tuple.t_self = vtmove[ti].tid;
-                                       Cbuf = ReadBuffer(onerel,
-                                                                ItemPointerGetBlockNumber(&(tuple.t_self)));
+                                       Cbuf = ReadBufferWithStrategy(onerel,
+                                                                ItemPointerGetBlockNumber(&(tuple.t_self)),
+                                                                                                 vac_strategy);
  
                                         /* Get page to move to */
-                                       dst_buffer = ReadBuffer(onerel, destvacpage->blkno);
+                                       dst_buffer = ReadBufferWithStrategy(onerel,
+                                                                                                               destvacpage->blkno,
+                                                                                                               vac_strategy);
  
                                         LockBuffer(dst_buffer, BUFFER_LOCK_EXCLUSIVE);
                                         if (dst_buffer != Cbuf)
@@ -2239,7 +2246,9 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
                                 if (i == num_fraged_pages)
                                         break;          /* can't move item anywhere */
                                 dst_vacpage = fraged_pages->pagedesc[i];
-                               dst_buffer = ReadBuffer(onerel, dst_vacpage->blkno);
+                               dst_buffer = ReadBufferWithStrategy(onerel,
+                                                                                                       dst_vacpage->blkno,
+                                                                                                       vac_strategy);
                                 LockBuffer(dst_buffer, BUFFER_LOCK_EXCLUSIVE);
                                 dst_page = BufferGetPage(dst_buffer);
                                 /* if this page was not used before - clean it */
@@ -2386,7 +2395,9 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
                         Page            page;
  
                         /* this page was not used as a move target, so must clean it */
-                       buf = ReadBuffer(onerel, (*curpage)->blkno);
+                       buf = ReadBufferWithStrategy(onerel,
+                                                                                (*curpage)->blkno,
+                                                                                vac_strategy);
                         LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
                         page = BufferGetPage(buf);
                         if (!PageIsEmpty(page))
@@ -2470,7 +2481,7 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
                         int                     uncnt;
                         int                     num_tuples = 0;
  
-                       buf = ReadBuffer(onerel, vacpage->blkno);
+                       buf = ReadBufferWithStrategy(onerel, vacpage->blkno, vac_strategy);
                         LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
                         page = BufferGetPage(buf);
                         maxoff = PageGetMaxOffsetNumber(page);
@@ -2859,7 +2870,7 @@ update_hint_bits(Relation rel, VacPageList fraged_pages, int num_fraged_pages,
                         break;                          /* no need to scan any further */
                 if ((*curpage)->offsets_used == 0)
                         continue;                       /* this page was never used as a move dest */
-               buf = ReadBuffer(rel, (*curpage)->blkno);
+               buf = ReadBufferWithStrategy(rel, (*curpage)->blkno, vac_strategy);
                 LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
                 page = BufferGetPage(buf);
                 max_offset = PageGetMaxOffsetNumber(page);
@@ -2925,7 +2936,9 @@ vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages)
  
                 if ((*vacpage)->offsets_free > 0)
                 {
-                       buf = ReadBuffer(onerel, (*vacpage)->blkno);
+                       buf = ReadBufferWithStrategy(onerel,
+                                                                                (*vacpage)->blkno,
+                                                                                vac_strategy);
                         LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
                         vacuum_page(onerel, buf, *vacpage);
                         UnlockReleaseBuffer(buf);
@@ -3012,6 +3025,7 @@ scan_index(Relation indrel, double num_tuples)
         ivinfo.vacuum_full = true;
         ivinfo.message_level = elevel;
         ivinfo.num_heap_tuples = num_tuples;
+       ivinfo.strategy = vac_strategy;
  
         stats = index_vacuum_cleanup(&ivinfo, NULL);
  
@@ -3077,6 +3091,7 @@ vacuum_index(VacPageList vacpagelist, Relation indrel,
         ivinfo.vacuum_full = true;
         ivinfo.message_level = elevel;
         ivinfo.num_heap_tuples = num_tuples + keep_tuples;
+       ivinfo.strategy = vac_strategy;
  
         /* Do bulk deletion */
         stats = index_bulk_delete(&ivinfo, NULL, tid_reaped, (void *) vacpagelist);
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c

index 2c9a80540c39e1783c628a550fb3f1c6682c70fe..3ac097388b2ef42674a344c1c97fbdac089448a8 100644 (file)
--- a/src/backend/commands/vacuumlazy.c
+++ b/src/backend/commands/vacuumlazy.c
@@ -36,7 +36,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.89 2007/05/17 15:28:29 alvherre Exp $
+ *       $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.90 2007/05/30 20:11:57 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -96,11 +96,14 @@ typedef struct LVRelStats
  } LVRelStats;
  
  
+/* A few variables that don't seem worth passing around as parameters */
  static int     elevel = -1;
  
  static TransactionId OldestXmin;
  static TransactionId FreezeLimit;
  
+static BufferAccessStrategy vac_strategy;
+
  
  /* non-export function prototypes */
  static void lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
@@ -138,7 +141,8 @@ static int  vac_cmp_page_spaces(const void *left, const void *right);
   *             and locked the relation.
   */
  void
-lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
+lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
+                               BufferAccessStrategy bstrategy)
  {
         LVRelStats *vacrelstats;
         Relation   *Irel;
@@ -158,6 +162,8 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
         else
                 elevel = DEBUG2;
  
+       vac_strategy = bstrategy;
+
         vacuum_set_xid_limits(vacstmt->freeze_min_age, onerel->rd_rel->relisshared,
                                                   &OldestXmin, &FreezeLimit);
  
@@ -318,7 +324,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
                         vacrelstats->num_index_scans++;
                 }
  
-               buf = ReadBuffer(onerel, blkno);
+               buf = ReadBufferWithStrategy(onerel, blkno, vac_strategy);
  
                 /* Initially, we only need shared access to the buffer */
                 LockBuffer(buf, BUFFER_LOCK_SHARE);
@@ -586,7 +592,7 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
                 vacuum_delay_point();
  
                 tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]);
-               buf = ReadBuffer(onerel, tblk);
+               buf = ReadBufferWithStrategy(onerel, tblk, vac_strategy);
                 LockBufferForCleanup(buf);
                 tupindex = lazy_vacuum_page(onerel, tblk, buf, tupindex, vacrelstats);
                 /* Now that we've compacted the page, record its available space */
@@ -684,6 +690,7 @@ lazy_vacuum_index(Relation indrel,
         ivinfo.message_level = elevel;
         /* We don't yet know rel_tuples, so pass -1 */
         ivinfo.num_heap_tuples = -1;
+       ivinfo.strategy = vac_strategy;
  
         /* Do bulk deletion */
         *stats = index_bulk_delete(&ivinfo, *stats,
@@ -713,6 +720,7 @@ lazy_cleanup_index(Relation indrel,
         ivinfo.vacuum_full = false;
         ivinfo.message_level = elevel;
         ivinfo.num_heap_tuples = vacrelstats->rel_tuples;
+       ivinfo.strategy = vac_strategy;
  
         stats = index_vacuum_cleanup(&ivinfo, stats);
  
@@ -869,7 +877,7 @@ count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
  
                 blkno--;
  
-               buf = ReadBuffer(onerel, blkno);
+               buf = ReadBufferWithStrategy(onerel, blkno, vac_strategy);
  
                 /* In this phase we only need shared access to the buffer */
                 LockBuffer(buf, BUFFER_LOCK_SHARE);
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c

index 4d73c6c3f592f557aeb5d361cfd96475ccc71e52..752af9983131214553689f8d1fab541d449d895c 100644 (file)
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -10,7 +10,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/postmaster/autovacuum.c,v 1.46 2007/05/07 20:41:24 alvherre Exp $
+ *       $PostgreSQL: pgsql/src/backend/postmaster/autovacuum.c,v 1.47 2007/05/30 20:11:57 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -218,7 +218,8 @@ static void relation_needs_vacanalyze(Oid relid, Form_pg_autovacuum avForm,
                                                   bool *doanalyze);
  
  static void autovacuum_do_vac_analyze(Oid relid, bool dovacuum,
-                                                 bool doanalyze, int freeze_min_age);
+                                                 bool doanalyze, int freeze_min_age,
+                                                 BufferAccessStrategy bstrategy);
  static HeapTuple get_pg_autovacuum_tuple_relid(Relation avRel, Oid relid);
  static PgStat_StatTabEntry *get_pgstat_tabentry_relid(Oid relid, bool isshared,
                                                   PgStat_StatDBEntry *shared,
@@ -1673,6 +1674,7 @@ do_autovacuum(void)
         ListCell   *cell;
         PgStat_StatDBEntry *shared;
         PgStat_StatDBEntry *dbentry;
+       BufferAccessStrategy bstrategy;
  
         /*
          * may be NULL if we couldn't find an entry (only happens if we
@@ -1812,6 +1814,13 @@ do_autovacuum(void)
         list_free(toast_oids);
         toast_oids = NIL;
  
+       /*
+        * Create a buffer access strategy object for VACUUM to use.  We want
+        * to use the same one across all the vacuum operations we perform,
+        * since the point is for VACUUM not to blow out the shared cache.
+        */
+       bstrategy = GetAccessStrategy(BAS_VACUUM);
+
         /*
          * Perform operations on collected tables.
          */
@@ -1910,7 +1919,8 @@ next_worker:
                 autovacuum_do_vac_analyze(tab->at_relid,
                                                                   tab->at_dovacuum,
                                                                   tab->at_doanalyze,
-                                                                 tab->at_freeze_min_age);
+                                                                 tab->at_freeze_min_age,
+                                                                 bstrategy);
                 /* be tidy */
                 pfree(tab);
         }
@@ -2328,7 +2338,8 @@ relation_needs_vacanalyze(Oid relid,
   */
  static void
  autovacuum_do_vac_analyze(Oid relid, bool dovacuum, bool doanalyze,
-                                                 int freeze_min_age)
+                                                 int freeze_min_age,
+                                                 BufferAccessStrategy bstrategy)
  {
         VacuumStmt      vacstmt;
         MemoryContext old_cxt;
@@ -2354,7 +2365,7 @@ autovacuum_do_vac_analyze(Oid relid, bool dovacuum, bool doanalyze,
         /* Let pgstat know what we're doing */
         autovac_report_activity(&vacstmt, relid);
  
-       vacuum(&vacstmt, list_make1_oid(relid), true);
+       vacuum(&vacstmt, list_make1_oid(relid), bstrategy, true);
         MemoryContextSwitchTo(old_cxt);
  }
  
diff --git a/src/backend/storage/buffer/README b/src/backend/storage/buffer/README

index afdea2af747277f68e1ab9c7f607e295a01c4c2a..f6327f875e67e0ff8a2be008fdd710bc8593f718 100644 (file)
--- a/src/backend/storage/buffer/README
+++ b/src/backend/storage/buffer/README
@@ -1,4 +1,4 @@
-$PostgreSQL: pgsql/src/backend/storage/buffer/README,v 1.11 2006/07/23 03:07:58 tgl Exp $
+$PostgreSQL: pgsql/src/backend/storage/buffer/README,v 1.12 2007/05/30 20:11:58 tgl Exp $
  
  Notes about shared buffer access rules
  --------------------------------------
@@ -152,20 +152,21 @@ we could use per-backend LWLocks instead (a buffer header would then contain
  a field to show which backend is doing its I/O).
  
  
-Buffer replacement strategy
----------------------------
+Normal buffer replacement strategy
+----------------------------------
  
  There is a "free list" of buffers that are prime candidates for replacement.
  In particular, buffers that are completely free (contain no valid page) are
-always in this list.  We may also throw buffers into this list if we
-consider their pages unlikely to be needed soon.  The list is singly-linked
-using fields in the buffer headers; we maintain head and tail pointers in
-global variables.  (Note: although the list links are in the buffer headers,
-they are considered to be protected by the BufFreelistLock, not the
-buffer-header spinlocks.)  To choose a victim buffer to recycle when there
-are no free buffers available, we use a simple clock-sweep algorithm, which
-avoids the need to take system-wide locks during common operations.  It
-works like this:
+always in this list.  We could also throw buffers into this list if we
+consider their pages unlikely to be needed soon; however, the current
+algorithm never does that.  The list is singly-linked using fields in the
+buffer headers; we maintain head and tail pointers in global variables.
+(Note: although the list links are in the buffer headers, they are
+considered to be protected by the BufFreelistLock, not the buffer-header
+spinlocks.)  To choose a victim buffer to recycle when there are no free
+buffers available, we use a simple clock-sweep algorithm, which avoids the
+need to take system-wide locks during common operations.  It works like
+this:
  
  Each buffer header contains a usage counter, which is incremented (up to a
  small limit value) whenever the buffer is unpinned.  (This requires only the
@@ -199,22 +200,40 @@ before we can recycle it; if someone else pins the buffer meanwhile we will
  have to give up and try another buffer.  This however is not a concern
  of the basic select-a-victim-buffer algorithm.)
  
-A special provision is that while running VACUUM, a backend does not
-increment the usage count on buffers it accesses.  In fact, if ReleaseBuffer
-sees that it is dropping the pin count to zero and the usage count is zero,
-then it appends the buffer to the tail of the free list.  (This implies that
-VACUUM, but only VACUUM, must take the BufFreelistLock during ReleaseBuffer;
-this shouldn't create much of a contention problem.)  This provision
-encourages VACUUM to work in a relatively small number of buffers rather
-than blowing out the entire buffer cache.  It is reasonable since a page
-that has been touched only by VACUUM is unlikely to be needed again soon.
-
-Since VACUUM usually requests many pages very fast, the effect of this is that
-it will get back the very buffers it filled and possibly modified on the next
-call and will therefore do its work in a few shared memory buffers, while
-being able to use whatever it finds in the cache already.  This also implies
-that most of the write traffic caused by a VACUUM will be done by the VACUUM
-itself and not pushed off onto other processes.
+
+Buffer ring replacement strategy
+---------------------------------
+
+When running a query that needs to access a large number of pages just once,
+such as VACUUM or a large sequential scan, a different strategy is used.
+A page that has been touched only by such a scan is unlikely to be needed
+again soon, so instead of running the normal clock sweep algorithm and
+blowing out the entire buffer cache, a small ring of buffers is allocated
+using the normal clock sweep algorithm and those buffers are reused for the
+whole scan.  This also implies that much of the write traffic caused by such
+a statement will be done by the backend itself and not pushed off onto other
+processes.
+
+For sequential scans, a 256KB ring is used. That's small enough to fit in L2
+cache, which makes transferring pages from OS cache to shared buffer cache
+efficient.  Even less would often be enough, but the ring must be big enough
+to accommodate all pages in the scan that are pinned concurrently.  256KB
+should also be enough to leave a small cache trail for other backends to
+join in a synchronized seq scan.  If a ring buffer is dirtied and its LSN
+updated, we would normally have to write and flush WAL before we could
+re-use the buffer; in this case we instead discard the buffer from the ring
+and (later) choose a replacement using the normal clock-sweep algorithm.
+Hence this strategy works best for scans that are read-only (or at worst
+update hint bits).  In a scan that modifies every page in the scan, like a
+bulk UPDATE or DELETE, the buffers in the ring will always be dirtied and
+the ring strategy effectively degrades to the normal strategy.
+
+VACUUM uses a 256KB ring like sequential scans, but dirty pages are not
+removed from the ring.  Instead, WAL is flushed if needed to allow reuse of
+the buffers.  Before introducing the buffer ring strategy in 8.3, VACUUM's
+buffers were sent to the freelist, which was effectively a buffer ring of 1
+buffer, resulting in excessive WAL flushing.  Allowing VACUUM to update
+256KB between WAL flushes should be more efficient.
  
  
  Background writer's processing
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c

index e2cfc870e2e9eeb72c05a9ed26d5fdc0f4083b6f..bbb6e0bc04afdcb027dd9a4e8ddbf6188e2d4e09 100644 (file)
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.219 2007/05/27 03:50:39 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.220 2007/05/30 20:11:58 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -90,11 +90,11 @@ static volatile BufferDesc *PinCountWaitBuf = NULL;
  
  
  static Buffer ReadBuffer_common(Relation reln, BlockNumber blockNum,
-                                                               bool zeroPage);
-static bool PinBuffer(volatile BufferDesc *buf);
+                                                               bool zeroPage,
+                                                               BufferAccessStrategy strategy);
+static bool PinBuffer(volatile BufferDesc *buf, BufferAccessStrategy strategy);
  static void PinBuffer_Locked(volatile BufferDesc *buf);
-static void UnpinBuffer(volatile BufferDesc *buf,
-                       bool fixOwner, bool normalAccess);
+static void UnpinBuffer(volatile BufferDesc *buf, bool fixOwner);
  static bool SyncOneBuffer(int buf_id, bool skip_pinned);
  static void WaitIO(volatile BufferDesc *buf);
  static bool StartBufferIO(volatile BufferDesc *buf, bool forInput);
@@ -102,7 +102,8 @@ static void TerminateBufferIO(volatile BufferDesc *buf, bool clear_dirty,
                                   int set_flag_bits);
  static void buffer_write_error_callback(void *arg);
  static volatile BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum,
-                       bool *foundPtr);
+                                                                               BufferAccessStrategy strategy,
+                                                                               bool *foundPtr);
  static void FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln);
  static void AtProcExit_Buffers(int code, Datum arg);
  
@@ -125,7 +126,18 @@ static void AtProcExit_Buffers(int code, Datum arg);
  Buffer
  ReadBuffer(Relation reln, BlockNumber blockNum)
  {
-       return ReadBuffer_common(reln, blockNum, false);
+       return ReadBuffer_common(reln, blockNum, false, NULL);
+}
+
+/*
+ * ReadBufferWithStrategy -- same as ReadBuffer, except caller can specify
+ *             a nondefault buffer access strategy.  See buffer/README for details.
+ */
+Buffer
+ReadBufferWithStrategy(Relation reln, BlockNumber blockNum,
+                                          BufferAccessStrategy strategy)
+{
+       return ReadBuffer_common(reln, blockNum, false, strategy);
  }
  
  /*
@@ -140,14 +152,15 @@ ReadBuffer(Relation reln, BlockNumber blockNum)
  Buffer
  ReadOrZeroBuffer(Relation reln, BlockNumber blockNum)
  {
-       return ReadBuffer_common(reln, blockNum, true);
+       return ReadBuffer_common(reln, blockNum, true, NULL);
  }
  
  /*
- * ReadBuffer_common -- common logic for ReadBuffer and ReadOrZeroBuffer
+ * ReadBuffer_common -- common logic for ReadBuffer variants
   */
  static Buffer
-ReadBuffer_common(Relation reln, BlockNumber blockNum, bool zeroPage)
+ReadBuffer_common(Relation reln, BlockNumber blockNum, bool zeroPage,
+                                 BufferAccessStrategy strategy)
  {
         volatile BufferDesc *bufHdr;
         Block           bufBlock;
@@ -185,7 +198,7 @@ ReadBuffer_common(Relation reln, BlockNumber blockNum, bool zeroPage)
                  * lookup the buffer.  IO_IN_PROGRESS is set if the requested block is
                  * not currently in memory.
                  */
-               bufHdr = BufferAlloc(reln, blockNum, &found);
+               bufHdr = BufferAlloc(reln, blockNum, strategy, &found);
                 if (found)
                         BufferHitCount++;
         }
@@ -330,6 +343,10 @@ ReadBuffer_common(Relation reln, BlockNumber blockNum, bool zeroPage)
   *             buffer.  If no buffer exists already, selects a replacement
   *             victim and evicts the old page, but does NOT read in new page.
   *
+ * "strategy" can be a buffer replacement strategy object, or NULL for
+ * the default strategy.  The selected buffer's usage_count is advanced when
+ * using the default strategy, but otherwise possibly not (see PinBuffer).
+ *
   * The returned buffer is pinned and is already marked as holding the
   * desired page.  If it already did have the desired page, *foundPtr is
   * set TRUE.  Otherwise, *foundPtr is set FALSE and the buffer is marked
@@ -343,6 +360,7 @@ ReadBuffer_common(Relation reln, BlockNumber blockNum, bool zeroPage)
  static volatile BufferDesc *
  BufferAlloc(Relation reln,
                         BlockNumber blockNum,
+                       BufferAccessStrategy strategy,
                         bool *foundPtr)
  {
         BufferTag       newTag;                 /* identity of requested block */
@@ -375,7 +393,7 @@ BufferAlloc(Relation reln,
                  */
                 buf = &BufferDescriptors[buf_id];
  
-               valid = PinBuffer(buf);
+               valid = PinBuffer(buf, strategy);
  
                 /* Can release the mapping lock as soon as we've pinned it */
                 LWLockRelease(newPartitionLock);
@@ -413,13 +431,15 @@ BufferAlloc(Relation reln,
         /* Loop here in case we have to try another victim buffer */
         for (;;)
         {
+               bool lock_held;
+
                 /*
                  * Select a victim buffer.      The buffer is returned with its header
-                * spinlock still held!  Also the BufFreelistLock is still held, since
-                * it would be bad to hold the spinlock while possibly waking up other
-                * processes.
+                * spinlock still held!  Also (in most cases) the BufFreelistLock is
+                * still held, since it would be bad to hold the spinlock while
+                * possibly waking up other processes.
                  */
-               buf = StrategyGetBuffer();
+               buf = StrategyGetBuffer(strategy, &lock_held);
  
                 Assert(buf->refcount == 0);
  
@@ -430,7 +450,8 @@ BufferAlloc(Relation reln,
                 PinBuffer_Locked(buf);
  
                 /* Now it's safe to release the freelist lock */
-               LWLockRelease(BufFreelistLock);
+               if (lock_held)
+                       LWLockRelease(BufFreelistLock);
  
                 /*
                  * If the buffer was dirty, try to write it out.  There is a race
@@ -458,16 +479,34 @@ BufferAlloc(Relation reln,
                          */
                         if (LWLockConditionalAcquire(buf->content_lock, LW_SHARED))
                         {
+                               /*
+                                * If using a nondefault strategy, and writing the buffer
+                                * would require a WAL flush, let the strategy decide whether
+                                * to go ahead and write/reuse the buffer or to choose another
+                                * victim.  We need lock to inspect the page LSN, so this
+                                * can't be done inside StrategyGetBuffer.
+                                */
+                               if (strategy != NULL &&
+                                       XLogNeedsFlush(BufferGetLSN(buf)) &&
+                                       StrategyRejectBuffer(strategy, buf))
+                               {
+                                       /* Drop lock/pin and loop around for another buffer */
+                                       LWLockRelease(buf->content_lock);
+                                       UnpinBuffer(buf, true);
+                                       continue;
+                               }
+
+                               /* OK, do the I/O */
                                 FlushBuffer(buf, NULL);
                                 LWLockRelease(buf->content_lock);
                         }
                         else
                         {
                                 /*
-                                * Someone else has pinned the buffer, so give it up and loop
+                                * Someone else has locked the buffer, so give it up and loop
                                  * back to get another one.
                                  */
-                               UnpinBuffer(buf, true, false /* evidently recently used */ );
+                               UnpinBuffer(buf, true);
                                 continue;
                         }
                 }
@@ -531,10 +570,9 @@ BufferAlloc(Relation reln,
                          * Got a collision. Someone has already done what we were about to
                          * do. We'll just handle this as if it were found in the buffer
                          * pool in the first place.  First, give up the buffer we were
-                        * planning to use.  Don't allow it to be thrown in the free list
-                        * (we don't want to hold freelist and mapping locks at once).
+                        * planning to use.
                          */
-                       UnpinBuffer(buf, true, false);
+                       UnpinBuffer(buf, true);
  
                         /* Can give up that buffer's mapping partition lock now */
                         if ((oldFlags & BM_TAG_VALID) &&
@@ -545,7 +583,7 @@ BufferAlloc(Relation reln,
  
                         buf = &BufferDescriptors[buf_id];
  
-                       valid = PinBuffer(buf);
+                       valid = PinBuffer(buf, strategy);
  
                         /* Can release the mapping lock as soon as we've pinned it */
                         LWLockRelease(newPartitionLock);
@@ -595,20 +633,21 @@ BufferAlloc(Relation reln,
                         oldPartitionLock != newPartitionLock)
                         LWLockRelease(oldPartitionLock);
                 LWLockRelease(newPartitionLock);
-               UnpinBuffer(buf, true, false /* evidently recently used */ );
+               UnpinBuffer(buf, true);
         }
  
         /*
          * Okay, it's finally safe to rename the buffer.
          *
          * Clearing BM_VALID here is necessary, clearing the dirtybits is just
-        * paranoia.  We also clear the usage_count since any recency of use of
-        * the old content is no longer relevant.
+        * paranoia.  We also reset the usage_count since any recency of use of
+        * the old content is no longer relevant.  (The usage_count starts out
+        * at 1 so that the buffer can survive one clock-sweep pass.)
          */
         buf->tag = newTag;
         buf->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_IO_ERROR);
         buf->flags |= BM_TAG_VALID;
-       buf->usage_count = 0;
+       buf->usage_count = 1;
  
         UnlockBufHdr(buf);
  
@@ -736,7 +775,7 @@ retry:
         /*
          * Insert the buffer at the head of the list of free buffers.
          */
-       StrategyFreeBuffer(buf, true);
+       StrategyFreeBuffer(buf);
  }
  
  /*
@@ -814,9 +853,6 @@ ReleaseAndReadBuffer(Buffer buffer,
                                 return buffer;
                         ResourceOwnerForgetBuffer(CurrentResourceOwner, buffer);
                         LocalRefCount[-buffer - 1]--;
-                       if (LocalRefCount[-buffer - 1] == 0 &&
-                               bufHdr->usage_count < BM_MAX_USAGE_COUNT)
-                               bufHdr->usage_count++;
                 }
                 else
                 {
@@ -826,7 +862,7 @@ ReleaseAndReadBuffer(Buffer buffer,
                         if (bufHdr->tag.blockNum == blockNum &&
                                 RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node))
                                 return buffer;
-                       UnpinBuffer(bufHdr, true, true);
+                       UnpinBuffer(bufHdr, true);
                 }
         }
  
@@ -836,6 +872,14 @@ ReleaseAndReadBuffer(Buffer buffer,
  /*
   * PinBuffer -- make buffer unavailable for replacement.
   *
+ * For the default access strategy, the buffer's usage_count is incremented
+ * when we first pin it; for other strategies we just make sure the usage_count
+ * isn't zero.  (The idea of the latter is that we don't want synchronized
+ * heap scans to inflate the count, but we need it to not be zero to discourage
+ * other backends from stealing buffers from our ring.  As long as we cycle
+ * through the ring faster than the global clock-sweep cycles, buffers in
+ * our ring won't be chosen as victims for replacement by other backends.)
+ *
   * This should be applied only to shared buffers, never local ones.
   *
   * Note that ResourceOwnerEnlargeBuffers must have been done already.
@@ -844,7 +888,7 @@ ReleaseAndReadBuffer(Buffer buffer,
   * some callers to avoid an extra spinlock cycle.
   */
  static bool
-PinBuffer(volatile BufferDesc *buf)
+PinBuffer(volatile BufferDesc *buf, BufferAccessStrategy strategy)
  {
         int                     b = buf->buf_id;
         bool            result;
@@ -853,6 +897,16 @@ PinBuffer(volatile BufferDesc *buf)
         {
                 LockBufHdr(buf);
                 buf->refcount++;
+               if (strategy == NULL)
+               {
+                       if (buf->usage_count < BM_MAX_USAGE_COUNT)
+                               buf->usage_count++;
+               }
+               else
+               {
+                       if (buf->usage_count == 0)
+                               buf->usage_count = 1;
+               }
                 result = (buf->flags & BM_VALID) != 0;
                 UnlockBufHdr(buf);
         }
@@ -872,6 +926,11 @@ PinBuffer(volatile BufferDesc *buf)
   * PinBuffer_Locked -- as above, but caller already locked the buffer header.
   * The spinlock is released before return.
   *
+ * Currently, no callers of this function want to modify the buffer's
+ * usage_count at all, so there's no need for a strategy parameter.
+ * Also we don't bother with a BM_VALID test (the caller could check that for
+ * itself).
+ *
   * Note: use of this routine is frequently mandatory, not just an optimization
   * to save a spin lock/unlock cycle, because we need to pin a buffer before
   * its state can change under us.
@@ -897,17 +956,9 @@ PinBuffer_Locked(volatile BufferDesc *buf)
   *
   * Most but not all callers want CurrentResourceOwner to be adjusted.
   * Those that don't should pass fixOwner = FALSE.
- *
- * normalAccess indicates that we are finishing a "normal" page access,
- * that is, one requested by something outside the buffer subsystem.
- * Passing FALSE means it's an internal access that should not update the
- * buffer's usage count nor cause a change in the freelist.
- *
- * If we are releasing a buffer during VACUUM, and it's not been otherwise
- * used recently, and normalAccess is true, we send the buffer to the freelist.
   */
  static void
-UnpinBuffer(volatile BufferDesc *buf, bool fixOwner, bool normalAccess)
+UnpinBuffer(volatile BufferDesc *buf, bool fixOwner)
  {
         int                     b = buf->buf_id;
  
@@ -919,8 +970,6 @@ UnpinBuffer(volatile BufferDesc *buf, bool fixOwner, bool normalAccess)
         PrivateRefCount[b]--;
         if (PrivateRefCount[b] == 0)
         {
-               bool            immed_free_buffer = false;
-
                 /* I'd better not still hold any locks on the buffer */
                 Assert(!LWLockHeldByMe(buf->content_lock));
                 Assert(!LWLockHeldByMe(buf->io_in_progress_lock));
@@ -931,22 +980,7 @@ UnpinBuffer(volatile BufferDesc *buf, bool fixOwner, bool normalAccess)
                 Assert(buf->refcount > 0);
                 buf->refcount--;
  
-               /* Update buffer usage info, unless this is an internal access */
-               if (normalAccess)
-               {
-                       if (!strategy_hint_vacuum)
-                       {
-                               if (buf->usage_count < BM_MAX_USAGE_COUNT)
-                                       buf->usage_count++;
-                       }
-                       else
-                       {
-                               /* VACUUM accesses don't bump usage count, instead... */
-                               if (buf->refcount == 0 && buf->usage_count == 0)
-                                       immed_free_buffer = true;
-                       }
-               }
-
+               /* Support LockBufferForCleanup() */
                 if ((buf->flags & BM_PIN_COUNT_WAITER) &&
                         buf->refcount == 1)
                 {
@@ -959,14 +993,6 @@ UnpinBuffer(volatile BufferDesc *buf, bool fixOwner, bool normalAccess)
                 }
                 else
                         UnlockBufHdr(buf);
-
-               /*
-                * If VACUUM is releasing an otherwise-unused buffer, send it to the
-                * freelist for near-term reuse.  We put it at the tail so that it
-                * won't be used before any invalid buffers that may exist.
-                */
-               if (immed_free_buffer)
-                       StrategyFreeBuffer(buf, false);
         }
  }
  
@@ -1150,7 +1176,7 @@ SyncOneBuffer(int buf_id, bool skip_pinned)
         FlushBuffer(bufHdr, NULL);
  
         LWLockRelease(bufHdr->content_lock);
-       UnpinBuffer(bufHdr, true, false /* don't change freelist */ );
+       UnpinBuffer(bufHdr, true);
  
         return true;
  }
@@ -1266,7 +1292,7 @@ AtProcExit_Buffers(int code, Datum arg)
                          * here, it suggests that ResourceOwners are messed up.
                          */
                         PrivateRefCount[i] = 1;         /* make sure we release shared pin */
-                       UnpinBuffer(buf, false, false /* don't change freelist */ );
+                       UnpinBuffer(buf, false);
                         Assert(PrivateRefCount[i] == 0);
                 }
         }
@@ -1700,7 +1726,7 @@ FlushRelationBuffers(Relation rel)
                         LWLockAcquire(bufHdr->content_lock, LW_SHARED);
                         FlushBuffer(bufHdr, rel->rd_smgr);
                         LWLockRelease(bufHdr->content_lock);
-                       UnpinBuffer(bufHdr, true, false /* no freelist change */ );
+                       UnpinBuffer(bufHdr, true);
                 }
                 else
                         UnlockBufHdr(bufHdr);
@@ -1723,11 +1749,7 @@ ReleaseBuffer(Buffer buffer)
         if (BufferIsLocal(buffer))
         {
                 Assert(LocalRefCount[-buffer - 1] > 0);
-               bufHdr = &LocalBufferDescriptors[-buffer - 1];
                 LocalRefCount[-buffer - 1]--;
-               if (LocalRefCount[-buffer - 1] == 0 &&
-                       bufHdr->usage_count < BM_MAX_USAGE_COUNT)
-                       bufHdr->usage_count++;
                 return;
         }
  
@@ -1738,7 +1760,7 @@ ReleaseBuffer(Buffer buffer)
         if (PrivateRefCount[buffer - 1] > 1)
                 PrivateRefCount[buffer - 1]--;
         else
-               UnpinBuffer(bufHdr, false, true);
+               UnpinBuffer(bufHdr, false);
  }
  
  /*
diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c

index a8c3aa2dcdc70c287478972a49825fde1dfc7bc9..d8eec0f8231110f5bf3cc3108487b5a7312bbb61 100644 (file)
--- a/src/backend/storage/buffer/freelist.c
+++ b/src/backend/storage/buffer/freelist.c
@@ -9,7 +9,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/storage/buffer/freelist.c,v 1.58 2007/01/05 22:19:37 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/storage/buffer/freelist.c,v 1.59 2007/05/30 20:11:59 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -39,8 +39,42 @@ typedef struct
  /* Pointers to shared state */
  static BufferStrategyControl *StrategyControl = NULL;
  
-/* Backend-local state about whether currently vacuuming */
-bool           strategy_hint_vacuum = false;
+/*
+ * Private (non-shared) state for managing a ring of shared buffers to re-use.
+ * This is currently the only kind of BufferAccessStrategy object, but someday
+ * we might have more kinds.
+ */
+typedef struct BufferAccessStrategyData
+{
+       /* Overall strategy type */
+       BufferAccessStrategyType btype;
+       /* Number of elements in buffers[] array */
+       int                     ring_size;
+       /*
+        * Index of the "current" slot in the ring, ie, the one most recently
+        * returned by GetBufferFromRing.
+        */
+       int                     current;
+       /*
+        * True if the buffer just returned by StrategyGetBuffer had been in
+        * the ring already.
+        */
+       bool            current_was_in_ring;
+
+       /*
+        * Array of buffer numbers.  InvalidBuffer (that is, zero) indicates
+        * we have not yet selected a buffer for this ring slot.  For allocation
+        * simplicity this is palloc'd together with the fixed fields of the
+        * struct.
+        */
+       Buffer          buffers[1];                             /* VARIABLE SIZE ARRAY */
+} BufferAccessStrategyData;
+
+
+/* Prototypes for internal functions */
+static volatile BufferDesc *GetBufferFromRing(BufferAccessStrategy strategy);
+static void AddBufferToRing(BufferAccessStrategy strategy,
+                                                       volatile BufferDesc *buf);
  
  
  /*
@@ -50,17 +84,38 @@ bool                strategy_hint_vacuum = false;
   *     BufferAlloc(). The only hard requirement BufferAlloc() has is that
   *     the selected buffer must not currently be pinned by anyone.
   *
+ *     strategy is a BufferAccessStrategy object, or NULL for default strategy.
+ *
   *     To ensure that no one else can pin the buffer before we do, we must
- *     return the buffer with the buffer header spinlock still held.  That
- *     means that we return with the BufFreelistLock still held, as well;
- *     the caller must release that lock once the spinlock is dropped.
+ *     return the buffer with the buffer header spinlock still held.  If
+ *     *lock_held is set on exit, we have returned with the BufFreelistLock
+ *     still held, as well; the caller must release that lock once the spinlock
+ *     is dropped.  We do it that way because releasing the BufFreelistLock
+ *     might awaken other processes, and it would be bad to do the associated
+ *     kernel calls while holding the buffer header spinlock.
   */
  volatile BufferDesc *
-StrategyGetBuffer(void)
+StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held)
  {
         volatile BufferDesc *buf;
         int                     trycounter;
  
+       /*
+        * If given a strategy object, see whether it can select a buffer.
+        * We assume strategy objects don't need the BufFreelistLock.
+        */
+       if (strategy != NULL)
+       {
+               buf = GetBufferFromRing(strategy);
+               if (buf != NULL)
+               {
+                       *lock_held = false;
+                       return buf;
+               }
+       }
+
+       /* Nope, so lock the freelist */
+       *lock_held = true;
         LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);
  
         /*
@@ -82,11 +137,16 @@ StrategyGetBuffer(void)
                  * If the buffer is pinned or has a nonzero usage_count, we cannot use
                  * it; discard it and retry.  (This can only happen if VACUUM put a
                  * valid buffer in the freelist and then someone else used it before
-                * we got to it.)
+                * we got to it.  It's probably impossible altogether as of 8.3,
+                * but we'd better check anyway.)
                  */
                 LockBufHdr(buf);
                 if (buf->refcount == 0 && buf->usage_count == 0)
+               {
+                       if (strategy != NULL)
+                               AddBufferToRing(strategy, buf);
                         return buf;
+               }
                 UnlockBufHdr(buf);
         }
  
@@ -101,15 +161,23 @@ StrategyGetBuffer(void)
  
                 /*
                  * If the buffer is pinned or has a nonzero usage_count, we cannot use
-                * it; decrement the usage_count and keep scanning.
+                * it; decrement the usage_count (unless pinned) and keep scanning.
                  */
                 LockBufHdr(buf);
-               if (buf->refcount == 0 && buf->usage_count == 0)
-                       return buf;
-               if (buf->usage_count > 0)
+               if (buf->refcount == 0)
                 {
-                       buf->usage_count--;
-                       trycounter = NBuffers;
+                       if (buf->usage_count > 0)
+                       {
+                               buf->usage_count--;
+                               trycounter = NBuffers;
+                       }
+                       else
+                       {
+                               /* Found a usable buffer */
+                               if (strategy != NULL)
+                                       AddBufferToRing(strategy, buf);
+                               return buf;
+                       }
                 }
                 else if (--trycounter == 0)
                 {
@@ -132,13 +200,9 @@ StrategyGetBuffer(void)
  
  /*
   * StrategyFreeBuffer: put a buffer on the freelist
- *
- * The buffer is added either at the head or the tail, according to the
- * at_head parameter.  This allows a small amount of control over how
- * quickly the buffer is reused.
   */
  void
-StrategyFreeBuffer(volatile BufferDesc *buf, bool at_head)
+StrategyFreeBuffer(volatile BufferDesc *buf)
  {
         LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);
  
@@ -148,22 +212,10 @@ StrategyFreeBuffer(volatile BufferDesc *buf, bool at_head)
          */
         if (buf->freeNext == FREENEXT_NOT_IN_LIST)
         {
-               if (at_head)
-               {
-                       buf->freeNext = StrategyControl->firstFreeBuffer;
-                       if (buf->freeNext < 0)
-                               StrategyControl->lastFreeBuffer = buf->buf_id;
-                       StrategyControl->firstFreeBuffer = buf->buf_id;
-               }
-               else
-               {
-                       buf->freeNext = FREENEXT_END_OF_LIST;
-                       if (StrategyControl->firstFreeBuffer < 0)
-                               StrategyControl->firstFreeBuffer = buf->buf_id;
-                       else
-                               BufferDescriptors[StrategyControl->lastFreeBuffer].freeNext = buf->buf_id;
+               buf->freeNext = StrategyControl->firstFreeBuffer;
+               if (buf->freeNext < 0)
                         StrategyControl->lastFreeBuffer = buf->buf_id;
-               }
+               StrategyControl->firstFreeBuffer = buf->buf_id;
         }
  
         LWLockRelease(BufFreelistLock);
@@ -190,15 +242,6 @@ StrategySyncStart(void)
         return result;
  }
  
-/*
- * StrategyHintVacuum -- tell us whether VACUUM is active
- */
-void
-StrategyHintVacuum(bool vacuum_active)
-{
-       strategy_hint_vacuum = vacuum_active;
-}
-
  
  /*
   * StrategyShmemSize
@@ -274,3 +317,172 @@ StrategyInitialize(bool init)
         else
                 Assert(!init);
  }
+
+
+/* ----------------------------------------------------------------
+ *                             Backend-private buffer ring management
+ * ----------------------------------------------------------------
+ */
+
+
+/*
+ * GetAccessStrategy -- create a BufferAccessStrategy object
+ *
+ * The object is allocated in the current memory context.
+ */
+BufferAccessStrategy
+GetAccessStrategy(BufferAccessStrategyType btype)
+{
+       BufferAccessStrategy strategy;
+       int             ring_size;
+
+       /*
+        * Select ring size to use.  See buffer/README for rationales.
+        * (Currently all cases are the same size, but keep this code
+        * structure for flexibility.)
+        */
+       switch (btype)
+       {
+               case BAS_NORMAL:
+                       /* if someone asks for NORMAL, just give 'em a "default" object */
+                       return NULL;
+
+               case BAS_BULKREAD:
+                       ring_size = 256 * 1024 / BLCKSZ;
+                       break;
+               case BAS_VACUUM:
+                       ring_size = 256 * 1024 / BLCKSZ;
+                       break;
+
+               default:
+                       elog(ERROR, "unrecognized buffer access strategy: %d",
+                                (int) btype);
+                       return NULL;            /* keep compiler quiet */
+       }
+
+       /* Make sure ring isn't an undue fraction of shared buffers */
+       ring_size = Min(NBuffers / 8, ring_size);
+
+       /* Allocate the object and initialize all elements to zeroes */
+       strategy = (BufferAccessStrategy)
+               palloc0(offsetof(BufferAccessStrategyData, buffers) +
+                               ring_size * sizeof(Buffer));
+
+       /* Set fields that don't start out zero */
+       strategy->btype = btype;
+       strategy->ring_size = ring_size;
+
+       return strategy;
+}
+
+/*
+ * FreeAccessStrategy -- release a BufferAccessStrategy object
+ *
+ * A simple pfree would do at the moment, but we would prefer that callers
+ * don't assume that much about the representation of BufferAccessStrategy.
+ */
+void
+FreeAccessStrategy(BufferAccessStrategy strategy)
+{
+       /* don't crash if called on a "default" strategy */
+       if (strategy != NULL)
+               pfree(strategy);
+}
+
+/*
+ * GetBufferFromRing -- returns a buffer from the ring, or NULL if the
+ *             ring is empty.
+ *
+ * The bufhdr spin lock is held on the returned buffer.
+ */
+static volatile BufferDesc *
+GetBufferFromRing(BufferAccessStrategy strategy)
+{
+       volatile BufferDesc *buf;
+       Buffer          bufnum;
+
+       /* Advance to next ring slot */
+       if (++strategy->current >= strategy->ring_size)
+               strategy->current = 0;
+
+       /*
+        * If the slot hasn't been filled yet, tell the caller to allocate
+        * a new buffer with the normal allocation strategy.  He will then
+        * fill this slot by calling AddBufferToRing with the new buffer.
+        */
+       bufnum = strategy->buffers[strategy->current];
+       if (bufnum == InvalidBuffer)
+       {
+               strategy->current_was_in_ring = false;
+               return NULL;
+       }
+
+       /*
+        * If the buffer is pinned we cannot use it under any circumstances.
+        *
+        * If usage_count is 0 or 1 then the buffer is fair game (we expect 1,
+        * since our own previous usage of the ring element would have left it
+        * there, but it might've been decremented by clock sweep since then).
+        * A higher usage_count indicates someone else has touched the buffer,
+        * so we shouldn't re-use it.
+        */
+       buf = &BufferDescriptors[bufnum - 1];
+       LockBufHdr(buf);
+       if (buf->refcount == 0 && buf->usage_count <= 1)
+       {
+               strategy->current_was_in_ring = true;
+               return buf;
+       }
+       UnlockBufHdr(buf);
+
+       /*
+        * Tell caller to allocate a new buffer with the normal allocation
+        * strategy.  He'll then replace this ring element via AddBufferToRing.
+        */
+       strategy->current_was_in_ring = false;
+       return NULL;
+}
+
+/*
+ * AddBufferToRing -- add a buffer to the buffer ring
+ *
+ * Caller must hold the buffer header spinlock on the buffer.  Since this
+ * is called with the spinlock held, it had better be quite cheap.
+ */
+static void
+AddBufferToRing(BufferAccessStrategy strategy, volatile BufferDesc *buf)
+{
+       strategy->buffers[strategy->current] = BufferDescriptorGetBuffer(buf);
+}
+
+/*
+ * StrategyRejectBuffer -- consider rejecting a dirty buffer
+ *
+ * When a nondefault strategy is used, the buffer manager calls this function
+ * when it turns out that the buffer selected by StrategyGetBuffer needs to
+ * be written out and doing so would require flushing WAL too.  This gives us
+ * a chance to choose a different victim.
+ *
+ * Returns true if buffer manager should ask for a new victim, and false
+ * if this buffer should be written and re-used.
+ */
+bool
+StrategyRejectBuffer(BufferAccessStrategy strategy, volatile BufferDesc *buf)
+{
+       /* We only do this in bulkread mode */
+       if (strategy->btype != BAS_BULKREAD)
+               return false;
+
+       /* Don't muck with behavior of normal buffer-replacement strategy */
+       if (!strategy->current_was_in_ring ||
+               strategy->buffers[strategy->current] != BufferDescriptorGetBuffer(buf))
+               return false;
+
+       /*
+        * Remove the dirty buffer from the ring; necessary to prevent infinite
+        * loop if all ring members are dirty.
+        */
+       strategy->buffers[strategy->current] = InvalidBuffer;
+
+       return true;
+}
diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c

index 306ffe457694e4ee31f25693e74633c090346947..ad2bcf8dac6fa520bd6cb8a333fe748001990619 100644 (file)
--- a/src/backend/storage/buffer/localbuf.c
+++ b/src/backend/storage/buffer/localbuf.c
@@ -9,7 +9,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.76 2007/01/05 22:19:37 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.77 2007/05/30 20:11:59 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -57,7 +57,8 @@ static Block GetLocalBufferStorage(void);
   *
   * API is similar to bufmgr.c's BufferAlloc, except that we do not need
   * to do any locking since this is all local.  Also, IO_IN_PROGRESS
- * does not get set.
+ * does not get set.  Lastly, we support only default access strategy
+ * (hence, usage_count is always advanced).
   */
  BufferDesc *
  LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
@@ -88,7 +89,12 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
                 fprintf(stderr, "LB ALLOC (%u,%d) %d\n",
                                 RelationGetRelid(reln), blockNum, -b - 1);
  #endif
-
+               /* this part is equivalent to PinBuffer for a shared buffer */
+               if (LocalRefCount[b] == 0)
+               {
+                       if (bufHdr->usage_count < BM_MAX_USAGE_COUNT)
+                               bufHdr->usage_count++;
+               }
                 LocalRefCount[b]++;
                 ResourceOwnerRememberBuffer(CurrentResourceOwner,
                                                                         BufferDescriptorGetBuffer(bufHdr));
@@ -121,18 +127,21 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
  
                 bufHdr = &LocalBufferDescriptors[b];
  
-               if (LocalRefCount[b] == 0 && bufHdr->usage_count == 0)
-               {
-                       LocalRefCount[b]++;
-                       ResourceOwnerRememberBuffer(CurrentResourceOwner,
-                                                                               BufferDescriptorGetBuffer(bufHdr));
-                       break;
-               }
-
-               if (bufHdr->usage_count > 0)
+               if (LocalRefCount[b] == 0)
                 {
-                       bufHdr->usage_count--;
-                       trycounter = NLocBuffer;
+                       if (bufHdr->usage_count > 0)
+                       {
+                               bufHdr->usage_count--;
+                               trycounter = NLocBuffer;
+                       }
+                       else
+                       {
+                               /* Found a usable buffer */
+                               LocalRefCount[b]++;
+                               ResourceOwnerRememberBuffer(CurrentResourceOwner,
+                                                                                       BufferDescriptorGetBuffer(bufHdr));
+                               break;
+                       }
                 }
                 else if (--trycounter == 0)
                         ereport(ERROR,
@@ -199,7 +208,7 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
         bufHdr->tag = newTag;
         bufHdr->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_IO_ERROR);
         bufHdr->flags |= BM_TAG_VALID;
-       bufHdr->usage_count = 0;
+       bufHdr->usage_count = 1;
  
         *foundPtr = FALSE;
         return bufHdr;
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c

index 028cb47c7ac81b455511679eac2e6b7197809438..baa203a2d1986626810c0a398826ab97476ea849 100644 (file)
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -10,7 +10,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/tcop/utility.c,v 1.279 2007/04/27 22:05:49 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/tcop/utility.c,v 1.280 2007/05/30 20:12:01 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -931,7 +931,7 @@ ProcessUtility(Node *parsetree,
                         break;
  
                 case T_VacuumStmt:
-                       vacuum((VacuumStmt *) parsetree, NIL, isTopLevel);
+                       vacuum((VacuumStmt *) parsetree, NIL, NULL, isTopLevel);
                         break;
  
                 case T_ExplainStmt:
diff --git a/src/include/access/genam.h b/src/include/access/genam.h

index 1f31baf0e4529ab6eea4ce25983c4f0c8224c5a4..98296e62bef0c46f90faad1edabed5cb05bde95c 100644 (file)
--- a/src/include/access/genam.h
+++ b/src/include/access/genam.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/access/genam.h,v 1.66 2007/01/05 22:19:50 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/access/genam.h,v 1.67 2007/05/30 20:12:02 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -40,6 +40,7 @@ typedef struct IndexVacuumInfo
         bool            vacuum_full;    /* VACUUM FULL (we have exclusive lock) */
         int                     message_level;  /* ereport level for progress messages */
         double          num_heap_tuples;        /* tuples remaining in heap */
+       BufferAccessStrategy strategy;  /* access strategy for reads */
  } IndexVacuumInfo;
  
  /*
diff --git a/src/include/access/hash.h b/src/include/access/hash.h

index d382ee6ee91971f9a199afafc34d8a72dbd0b498..2bd314a8aa3acde8b6fd7c9cf61bb96769422a7b 100644 (file)
--- a/src/include/access/hash.h
+++ b/src/include/access/hash.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/access/hash.h,v 1.80 2007/05/03 16:45:58 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/hash.h,v 1.81 2007/05/30 20:12:02 tgl Exp $
   *
   * NOTES
   *             modeled after Margo Seltzer's hash implementation for unix.
@@ -273,11 +273,13 @@ extern void _hash_doinsert(Relation rel, IndexTuple itup);
  
  /* hashovfl.c */
  extern Buffer _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf);
-extern BlockNumber _hash_freeovflpage(Relation rel, Buffer ovflbuf);
+extern BlockNumber _hash_freeovflpage(Relation rel, Buffer ovflbuf,
+                                                                         BufferAccessStrategy bstrategy);
  extern void _hash_initbitmap(Relation rel, HashMetaPage metap,
                                  BlockNumber blkno);
  extern void _hash_squeezebucket(Relation rel,
-                                       Bucket bucket, BlockNumber bucket_blkno);
+                                                               Bucket bucket, BlockNumber bucket_blkno,
+                                                               BufferAccessStrategy bstrategy);
  
  /* hashpage.c */
  extern void _hash_getlock(Relation rel, BlockNumber whichlock, int access);
@@ -287,6 +289,9 @@ extern Buffer _hash_getbuf(Relation rel, BlockNumber blkno,
                                                    int access, int flags);
  extern Buffer _hash_getinitbuf(Relation rel, BlockNumber blkno);
  extern Buffer _hash_getnewbuf(Relation rel, BlockNumber blkno);
+extern Buffer _hash_getbuf_with_strategy(Relation rel, BlockNumber blkno,
+                                                                                int access, int flags,
+                                                                                BufferAccessStrategy bstrategy);
  extern void _hash_relbuf(Relation rel, Buffer buf);
  extern void _hash_dropbuf(Relation rel, Buffer buf);
  extern void _hash_wrtbuf(Relation rel, Buffer buf);
diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h

index 7a1ea39352ade3e95692372b55589b73d8fde517..200b45713e737ad01f74974a041e26df380057e6 100644 (file)
--- a/src/include/access/relscan.h
+++ b/src/include/access/relscan.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/access/relscan.h,v 1.53 2007/05/27 03:50:39 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/relscan.h,v 1.54 2007/05/30 20:12:02 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -27,6 +27,7 @@ typedef struct HeapScanDescData
         int                     rs_nkeys;               /* number of scan keys */
         ScanKey         rs_key;                 /* array of scan key descriptors */
         BlockNumber rs_nblocks;         /* number of blocks to scan */
+       BufferAccessStrategy rs_strategy;       /* access strategy for reads */
         bool            rs_pageatatime; /* verify visibility page-at-a-time? */
  
         /* scan current state */
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h

index 87ff6aba5018794a1e185ececa7f9de93ce23ef4..1c741f38fd0435f6b13dabfa40851f5c31ba37b0 100644 (file)
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -6,7 +6,7 @@
   * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.77 2007/05/20 21:08:19 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.78 2007/05/30 20:12:02 tgl Exp $
   */
  #ifndef XLOG_H
  #define XLOG_H
@@ -159,6 +159,7 @@ extern bool XLOG_DEBUG;
  
  extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata);
  extern void XLogFlush(XLogRecPtr RecPtr);
+extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
  
  extern void xlog_redo(XLogRecPtr lsn, XLogRecord *record);
  extern void xlog_desc(StringInfo buf, uint8 xl_info, char *rec);
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h

index acb2f623e2729ac4f392c5390af45467b643410a..50a475bc5e3255ae65bd24892bdc971ff53afc6c 100644 (file)
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/commands/vacuum.h,v 1.71 2007/05/17 15:28:29 alvherre Exp $
+ * $PostgreSQL: pgsql/src/include/commands/vacuum.h,v 1.72 2007/05/30 20:12:03 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -18,9 +18,11 @@
  #include "catalog/pg_statistic.h"
  #include "catalog/pg_type.h"
  #include "nodes/parsenodes.h"
+#include "storage/buf.h"
  #include "storage/lock.h"
  #include "utils/rel.h"
  
+
  /*----------
   * ANALYZE builds one of these structs for each attribute (column) that is
   * to be analyzed.     The struct and subsidiary data are in anl_context,
@@ -110,7 +112,8 @@ extern int  vacuum_freeze_min_age;
  
  
  /* in commands/vacuum.c */
-extern void vacuum(VacuumStmt *vacstmt, List *relids, bool isTopLevel);
+extern void vacuum(VacuumStmt *vacstmt, List *relids,
+                                  BufferAccessStrategy bstrategy, bool isTopLevel);
  extern void vac_open_indexes(Relation relation, LOCKMODE lockmode,
                                  int *nindexes, Relation **Irel);
  extern void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode);
@@ -127,9 +130,11 @@ extern bool vac_is_partial_index(Relation indrel);
  extern void vacuum_delay_point(void);
  
  /* in commands/vacuumlazy.c */
-extern void lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt);
+extern void lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
+                                                       BufferAccessStrategy bstrategy);
  
  /* in commands/analyze.c */
-extern void analyze_rel(Oid relid, VacuumStmt *vacstmt);
+extern void analyze_rel(Oid relid, VacuumStmt *vacstmt,
+                                               BufferAccessStrategy bstrategy);
  
  #endif   /* VACUUM_H */
diff --git a/src/include/storage/buf.h b/src/include/storage/buf.h

index 94da564d1eb8a2e4a755423eb11df86e2dfc089c..a812a9e269ae3cf5152d79b7322ff3f82990da66 100644 (file)
--- a/src/include/storage/buf.h
+++ b/src/include/storage/buf.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/storage/buf.h,v 1.21 2007/01/05 22:19:57 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/storage/buf.h,v 1.22 2007/05/30 20:12:03 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -36,4 +36,11 @@ typedef int Buffer;
   */
  #define BufferIsLocal(buffer)  ((buffer) < 0)
  
+/*
+ * Buffer access strategy objects.
+ *
+ * BufferAccessStrategyData is private to freelist.c
+ */
+typedef struct BufferAccessStrategyData *BufferAccessStrategy;
+
  #endif   /* BUF_H */
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h

index 561b7e40f694323ccd5b67d82ad2383eda69631e..d5eef8734ffbdcbfe1d4d70a6540b4830af87238 100644 (file)
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -8,7 +8,7 @@
   * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/storage/buf_internals.h,v 1.89 2007/01/05 22:19:57 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/storage/buf_internals.h,v 1.90 2007/05/30 20:12:03 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -167,9 +167,6 @@ extern DLLIMPORT BufferDesc *BufferDescriptors;
  /* in localbuf.c */
  extern BufferDesc *LocalBufferDescriptors;
  
-/* in freelist.c */
-extern bool strategy_hint_vacuum;
-
  /* event counters in buf_init.c */
  extern long int ReadBufferCount;
  extern long int ReadLocalBufferCount;
@@ -184,8 +181,12 @@ extern long int LocalBufferFlushCount;
   */
  
  /* freelist.c */
-extern volatile BufferDesc *StrategyGetBuffer(void);
-extern void StrategyFreeBuffer(volatile BufferDesc *buf, bool at_head);
+extern volatile BufferDesc *StrategyGetBuffer(BufferAccessStrategy strategy,
+                                                                                         bool *lock_held);
+extern void StrategyFreeBuffer(volatile BufferDesc *buf);
+extern bool StrategyRejectBuffer(BufferAccessStrategy strategy,
+                                                                volatile BufferDesc *buf);
+
  extern int     StrategySyncStart(void);
  extern Size StrategyShmemSize(void);
  extern void StrategyInitialize(bool init);
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h

index ad20362179360784aeadc47fac9ea12bb5ce83af..9ae83b4253ebbd994369a0c5f6e0328d42e42000 100644 (file)
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.103 2007/05/02 23:18:03 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.104 2007/05/30 20:12:03 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -19,6 +19,14 @@
  
  typedef void *Block;
  
+/* Possible arguments for GetAccessStrategy() */
+typedef enum BufferAccessStrategyType
+{
+       BAS_NORMAL,             /* Normal random access */
+       BAS_BULKREAD,   /* Large read-only scan (hint bit updates are ok) */
+       BAS_VACUUM              /* VACUUM */
+} BufferAccessStrategyType;
+
  /* in globals.c ... this duplicates miscadmin.h */
  extern DLLIMPORT int NBuffers;
  
@@ -111,6 +119,8 @@ extern DLLIMPORT int32 *LocalRefCount;
   * prototypes for functions in bufmgr.c
   */
  extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum);
+extern Buffer ReadBufferWithStrategy(Relation reln, BlockNumber blockNum,
+                                                                        BufferAccessStrategy strategy);
  extern Buffer ReadOrZeroBuffer(Relation reln, BlockNumber blockNum);
  extern void ReleaseBuffer(Buffer buffer);
  extern void UnlockReleaseBuffer(Buffer buffer);
@@ -157,6 +167,7 @@ extern void BgBufferSync(void);
  extern void AtProcExit_LocalBuffers(void);
  
  /* in freelist.c */
-extern void StrategyHintVacuum(bool vacuum_active);
+extern BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype);
+extern void FreeAccessStrategy(BufferAccessStrategy strategy);
  
  #endif
author	Tom Lane <tgl@sss.pgh.pa.us>
	Wed, 30 May 2007 20:12:03 +0000 (20:12 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Wed, 30 May 2007 20:12:03 +0000 (20:12 +0000)
src/backend/access/hash/hash.c		patch \| blob \| blame \| history
src/backend/access/hash/hashovfl.c		patch \| blob \| blame \| history
src/backend/access/hash/hashpage.c		patch \| blob \| blame \| history
src/backend/access/heap/heapam.c		patch \| blob \| blame \| history
src/backend/access/nbtree/nbtree.c		patch \| blob \| blame \| history
src/backend/access/transam/xlog.c		patch \| blob \| blame \| history
src/backend/catalog/index.c		patch \| blob \| blame \| history
src/backend/commands/analyze.c		patch \| blob \| blame \| history
src/backend/commands/vacuum.c		patch \| blob \| blame \| history
src/backend/commands/vacuumlazy.c		patch \| blob \| blame \| history
src/backend/postmaster/autovacuum.c		patch \| blob \| blame \| history
src/backend/storage/buffer/README		patch \| blob \| blame \| history
src/backend/storage/buffer/bufmgr.c		patch \| blob \| blame \| history
src/backend/storage/buffer/freelist.c		patch \| blob \| blame \| history
src/backend/storage/buffer/localbuf.c		patch \| blob \| blame \| history
src/backend/tcop/utility.c		patch \| blob \| blame \| history
src/include/access/genam.h		patch \| blob \| blame \| history
src/include/access/hash.h		patch \| blob \| blame \| history
src/include/access/relscan.h		patch \| blob \| blame \| history
src/include/access/xlog.h		patch \| blob \| blame \| history
src/include/commands/vacuum.h		patch \| blob \| blame \| history
src/include/storage/buf.h		patch \| blob \| blame \| history
src/include/storage/buf_internals.h		patch \| blob \| blame \| history
src/include/storage/bufmgr.h		patch \| blob \| blame \| history