Use streaming I/O in ANALYZE.
authorThomas Munro <tmunro@postgresql.org>
Mon, 8 Apr 2024 01:16:20 +0000 (13:16 +1200)
committerThomas Munro <tmunro@postgresql.org>
Mon, 8 Apr 2024 01:16:28 +0000 (13:16 +1200)
The ANALYZE command prefetches and reads sample blocks chosen by a
BlockSampler algorithm. Instead of calling [Prefetch|Read]Buffer() for
each block, ANALYZE now uses the streaming API introduced in b5a9b18cd0.

Author: Nazir Bilal Yavuz <byavuz81@gmail.com>
Reviewed-by: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Jakub Wartak <jakub.wartak@enterprisedb.com>
Reviewed-by: Heikki Linnakangas <hlinnaka@iki.fi>
Reviewed-by: Thomas Munro <thomas.munro@gmail.com>
Discussion: https://postgr.es/m/flat/CAN55FZ0UhXqk9v3y-zW_fp4-WCp43V8y0A72xPmLkOM%2B6M%2BmJg%40mail.gmail.com

src/backend/access/heap/heapam_handler.c
src/backend/commands/analyze.c
src/include/access/heapam.h

index 58de2c82a707ab99579fab844d89684d7c1e0b3c..cc4d51d5514646f0e2eee9177e27d4fe46f7af98 100644 (file)
@@ -1055,33 +1055,36 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
 }
 
 /*
- * Prepare to analyze block `blockno` of `scan`.  The scan has been started
+ * Prepare to analyze the next block in the read stream.  Returns false if
+ * the stream is exhausted and true otherwise. The scan must have been started
  * with SO_TYPE_ANALYZE option.
  *
  * This routine holds a buffer pin and lock on the heap page.  They are held
  * until heapam_scan_analyze_next_tuple() returns false.  That is until all the
  * items of the heap page are analyzed.
  */
-void
-heapam_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno,
-                              BufferAccessStrategy bstrategy)
+bool
+heapam_scan_analyze_next_block(TableScanDesc scan, ReadStream *stream)
 {
    HeapScanDesc hscan = (HeapScanDesc) scan;
 
    /*
     * We must maintain a pin on the target page's buffer to ensure that
     * concurrent activity - e.g. HOT pruning - doesn't delete tuples out from
-    * under us.  Hence, pin the page until we are done looking at it.  We
-    * also choose to hold sharelock on the buffer throughout --- we could
-    * release and re-acquire sharelock for each tuple, but since we aren't
-    * doing much work per tuple, the extra lock traffic is probably better
-    * avoided.
+    * under us.  It comes from the stream already pinned.   We also choose to
+    * hold sharelock on the buffer throughout --- we could release and
+    * re-acquire sharelock for each tuple, but since we aren't doing much
+    * work per tuple, the extra lock traffic is probably better avoided.
     */
-   hscan->rs_cblock = blockno;
-   hscan->rs_cindex = FirstOffsetNumber;
-   hscan->rs_cbuf = ReadBufferExtended(scan->rs_rd, MAIN_FORKNUM,
-                                       blockno, RBM_NORMAL, bstrategy);
+   hscan->rs_cbuf = read_stream_next_buffer(stream, NULL);
+   if (!BufferIsValid(hscan->rs_cbuf))
+       return false;
+
    LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
+
+   hscan->rs_cblock = BufferGetBlockNumber(hscan->rs_cbuf);
+   hscan->rs_cindex = FirstOffsetNumber;
+   return true;
 }
 
 /*
index 2fb39f3ede16c667f0be93b0c137004edd892fb8..da27a13a3f0cc9e8d8f57629fb1e9177405ba311 100644 (file)
@@ -1102,6 +1102,20 @@ examine_attribute(Relation onerel, int attnum, Node *index_expr)
    return stats;
 }
 
+/*
+ * Read stream callback returning the next BlockNumber as chosen by the
+ * BlockSampling algorithm.
+ */
+static BlockNumber
+block_sampling_read_stream_next(ReadStream *stream,
+                               void *callback_private_data,
+                               void *per_buffer_data)
+{
+   BlockSamplerData *bs = callback_private_data;
+
+   return BlockSampler_HasMore(bs) ? BlockSampler_Next(bs) : InvalidBlockNumber;
+}
+
 /*
  * acquire_sample_rows -- acquire a random sample of rows from the heap
  *
@@ -1154,10 +1168,7 @@ acquire_sample_rows(Relation onerel, int elevel,
    TableScanDesc scan;
    BlockNumber nblocks;
    BlockNumber blksdone = 0;
-#ifdef USE_PREFETCH
-   int         prefetch_maximum = 0;   /* blocks to prefetch if enabled */
-   BlockSamplerData prefetch_bs;
-#endif
+   ReadStream *stream;
 
    Assert(targrows > 0);
 
@@ -1170,13 +1181,6 @@ acquire_sample_rows(Relation onerel, int elevel,
    randseed = pg_prng_uint32(&pg_global_prng_state);
    nblocks = BlockSampler_Init(&bs, totalblocks, targrows, randseed);
 
-#ifdef USE_PREFETCH
-   prefetch_maximum = get_tablespace_maintenance_io_concurrency(onerel->rd_rel->reltablespace);
-   /* Create another BlockSampler, using the same seed, for prefetching */
-   if (prefetch_maximum)
-       (void) BlockSampler_Init(&prefetch_bs, totalblocks, targrows, randseed);
-#endif
-
    /* Report sampling block numbers */
    pgstat_progress_update_param(PROGRESS_ANALYZE_BLOCKS_TOTAL,
                                 nblocks);
@@ -1187,60 +1191,19 @@ acquire_sample_rows(Relation onerel, int elevel,
    scan = heap_beginscan(onerel, NULL, 0, NULL, NULL, SO_TYPE_ANALYZE);
    slot = table_slot_create(onerel, NULL);
 
-#ifdef USE_PREFETCH
-
-   /*
-    * If we are doing prefetching, then go ahead and tell the kernel about
-    * the first set of pages we are going to want.  This also moves our
-    * iterator out ahead of the main one being used, where we will keep it so
-    * that we're always pre-fetching out prefetch_maximum number of blocks
-    * ahead.
-    */
-   if (prefetch_maximum)
-   {
-       for (int i = 0; i < prefetch_maximum; i++)
-       {
-           BlockNumber prefetch_block;
-
-           if (!BlockSampler_HasMore(&prefetch_bs))
-               break;
-
-           prefetch_block = BlockSampler_Next(&prefetch_bs);
-           PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, prefetch_block);
-       }
-   }
-#endif
+   stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE,
+                                       vac_strategy,
+                                       scan->rs_rd,
+                                       MAIN_FORKNUM,
+                                       block_sampling_read_stream_next,
+                                       &bs,
+                                       0);
 
    /* Outer loop over blocks to sample */
-   while (BlockSampler_HasMore(&bs))
+   while (heapam_scan_analyze_next_block(scan, stream))
    {
-       BlockNumber targblock = BlockSampler_Next(&bs);
-#ifdef USE_PREFETCH
-       BlockNumber prefetch_targblock = InvalidBlockNumber;
-
-       /*
-        * Make sure that every time the main BlockSampler is moved forward
-        * that our prefetch BlockSampler also gets moved forward, so that we
-        * always stay out ahead.
-        */
-       if (prefetch_maximum && BlockSampler_HasMore(&prefetch_bs))
-           prefetch_targblock = BlockSampler_Next(&prefetch_bs);
-#endif
-
        vacuum_delay_point();
 
-       heapam_scan_analyze_next_block(scan, targblock, vac_strategy);
-
-#ifdef USE_PREFETCH
-
-       /*
-        * When pre-fetching, after we get a block, tell the kernel about the
-        * next one we will want, if there's any left.
-        */
-       if (prefetch_maximum && prefetch_targblock != InvalidBlockNumber)
-           PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, prefetch_targblock);
-#endif
-
        while (heapam_scan_analyze_next_tuple(scan, OldestXmin, &liverows, &deadrows, slot))
        {
            /*
@@ -1290,6 +1253,8 @@ acquire_sample_rows(Relation onerel, int elevel,
                                     ++blksdone);
    }
 
+   read_stream_end(stream);
+
    ExecDropSingleTupleTableSlot(slot);
    heap_endscan(scan);
 
index 48936826bcc2e62caab043bd81bb4616694a2994..f84dbe629fe234f24030d6a8f3fc54f135110f4d 100644 (file)
@@ -413,9 +413,8 @@ extern bool HeapTupleIsSurelyDead(HeapTuple htup,
                                  struct GlobalVisState *vistest);
 
 /* in heap/heapam_handler.c*/
-extern void heapam_scan_analyze_next_block(TableScanDesc scan,
-                                          BlockNumber blockno,
-                                          BufferAccessStrategy bstrategy);
+extern bool heapam_scan_analyze_next_block(TableScanDesc scan,
+                                          ReadStream *stream);
 extern bool heapam_scan_analyze_next_tuple(TableScanDesc scan,
                                           TransactionId OldestXmin,
                                           double *liverows, double *deadrows,