revert: Generalize relation analyze in table AM interface
authorAlexander Korotkov <akorotkov@postgresql.org>
Tue, 16 Apr 2024 10:14:20 +0000 (13:14 +0300)
committerAlexander Korotkov <akorotkov@postgresql.org>
Tue, 16 Apr 2024 10:14:20 +0000 (13:14 +0300)
This commit reverts 27bc1772fc and dd1f6b0c17.  Per review by Andres Freund.

Discussion: https://postgr.es/m/20240415201057.khoyxbwwxfgzomeo%40awork3.anarazel.de

src/backend/access/heap/heapam_handler.c
src/backend/access/table/tableamapi.c
src/backend/commands/analyze.c
src/include/access/heapam.h
src/include/access/tableam.h
src/include/commands/vacuum.h
src/include/foreign/fdwapi.h
src/tools/pgindent/typedefs.list

index 3428d80817cd620d43aed475fe6c156c33027578..6f8b1b79298f3a364b95b76f93bd7d3814f362dd 100644 (file)
@@ -1002,7 +1002,7 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
  * until heapam_scan_analyze_next_tuple() returns false.  That is until all the
  * items of the heap page are analyzed.
  */
-bool
+static bool
 heapam_scan_analyze_next_block(TableScanDesc scan, ReadStream *stream)
 {
    HeapScanDesc hscan = (HeapScanDesc) scan;
@@ -1026,17 +1026,7 @@ heapam_scan_analyze_next_block(TableScanDesc scan, ReadStream *stream)
    return true;
 }
 
-/*
- * Iterate over tuples in the block selected with
- * heapam_scan_analyze_next_block().  If a tuple that's suitable for sampling
- * is found, true is returned and a tuple is stored in `slot`.  When no more
- * tuples for sampling, false is returned and the pin and lock acquired by
- * heapam_scan_analyze_next_block() are released.
- *
- * *liverows and *deadrows are incremented according to the encountered
- * tuples.
- */
-bool
+static bool
 heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
                               double *liverows, double *deadrows,
                               TupleTableSlot *slot)
@@ -2593,18 +2583,6 @@ SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
    }
 }
 
-/*
- * heapap_analyze -- implementation of relation_analyze() for heap
- *                  table access method
- */
-static void
-heapam_analyze(Relation relation, AcquireSampleRowsFunc *func,
-              BlockNumber *totalpages, BufferAccessStrategy bstrategy)
-{
-   block_level_table_analyze(relation, func, totalpages, bstrategy,
-                             heapam_scan_analyze_next_block,
-                             heapam_scan_analyze_next_tuple);
-}
 
 /* ------------------------------------------------------------------------
  * Definition of the heap table access method.
@@ -2652,9 +2630,10 @@ static const TableAmRoutine heapam_methods = {
    .relation_copy_data = heapam_relation_copy_data,
    .relation_copy_for_cluster = heapam_relation_copy_for_cluster,
    .relation_vacuum = heap_vacuum_rel,
+   .scan_analyze_next_block = heapam_scan_analyze_next_block,
+   .scan_analyze_next_tuple = heapam_scan_analyze_next_tuple,
    .index_build_range_scan = heapam_index_build_range_scan,
    .index_validate_scan = heapam_index_validate_scan,
-   .relation_analyze = heapam_analyze,
 
    .relation_size = table_block_relation_size,
    .relation_needs_toast_table = heapam_relation_needs_toast_table,
index 55b8caeadf2c8b2d3b350d9661954e1f31852299..ce637a5a5d9931163bd1126b1826129e143aab07 100644 (file)
@@ -81,6 +81,8 @@ GetTableAmRoutine(Oid amhandler)
    Assert(routine->relation_copy_data != NULL);
    Assert(routine->relation_copy_for_cluster != NULL);
    Assert(routine->relation_vacuum != NULL);
+   Assert(routine->scan_analyze_next_block != NULL);
+   Assert(routine->scan_analyze_next_tuple != NULL);
    Assert(routine->index_build_range_scan != NULL);
    Assert(routine->index_validate_scan != NULL);
 
index 516b43b0e341d2f37b7338c72be43fd0ada5954b..7d2cd24997292a0865cee0d51b7762e98d96590a 100644 (file)
@@ -17,7 +17,6 @@
 #include <math.h>
 
 #include "access/detoast.h"
-#include "access/heapam.h"
 #include "access/genam.h"
 #include "access/multixact.h"
 #include "access/relation.h"
@@ -76,8 +75,6 @@ int           default_statistics_target = 100;
 /* A few variables that don't seem worth passing around as parameters */
 static MemoryContext anl_context = NULL;
 static BufferAccessStrategy vac_strategy;
-static ScanAnalyzeNextBlockFunc scan_analyze_next_block;
-static ScanAnalyzeNextTupleFunc scan_analyze_next_tuple;
 
 
 static void do_analyze_rel(Relation onerel,
@@ -90,6 +87,9 @@ static void compute_index_stats(Relation onerel, double totalrows,
                                MemoryContext col_context);
 static VacAttrStats *examine_attribute(Relation onerel, int attnum,
                                       Node *index_expr);
+static int acquire_sample_rows(Relation onerel, int elevel,
+                               HeapTuple *rows, int targrows,
+                               double *totalrows, double *totaldeadrows);
 static int compare_rows(const void *a, const void *b, void *arg);
 static int acquire_inherited_sample_rows(Relation onerel, int elevel,
                                          HeapTuple *rows, int targrows,
@@ -190,12 +190,10 @@ analyze_rel(Oid relid, RangeVar *relation,
    if (onerel->rd_rel->relkind == RELKIND_RELATION ||
        onerel->rd_rel->relkind == RELKIND_MATVIEW)
    {
-       /*
-        * Get row acquisition function, blocks and tuples iteration callbacks
-        * provided by table AM
-        */
-       table_relation_analyze(onerel, &acquirefunc,
-                              &relpages, vac_strategy);
+       /* Regular table, so we'll use the regular row acquisition function */
+       acquirefunc = acquire_sample_rows;
+       /* Also get regular table's size */
+       relpages = RelationGetNumberOfBlocks(onerel);
    }
    else if (onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
    {
@@ -1119,17 +1117,15 @@ block_sampling_read_stream_next(ReadStream *stream,
 }
 
 /*
- * acquire_sample_rows -- acquire a random sample of rows from the
- * block-based relation
+ * acquire_sample_rows -- acquire a random sample of rows from the table
  *
  * Selected rows are returned in the caller-allocated array rows[], which
  * must have at least targrows entries.
  * The actual number of rows selected is returned as the function result.
- * We also estimate the total numbers of live and dead rows in the relation,
+ * We also estimate the total numbers of live and dead rows in the table,
  * and return them into *totalrows and *totaldeadrows, respectively.
  *
- * The returned list of tuples is in order by physical position in the
- * relation.
+ * The returned list of tuples is in order by physical position in the table.
  * (We will rely on this later to derive correlation estimates.)
  *
  * As of May 2004 we use a new two-stage method:  Stage one selects up
@@ -1151,7 +1147,7 @@ block_sampling_read_stream_next(ReadStream *stream,
  * look at a statistically unbiased set of blocks, we should get
  * unbiased estimates of the average numbers of live and dead rows per
  * block.  The previous sampling method put too much credence in the row
- * density near the start of the relation.
+ * density near the start of the table.
  */
 static int
 acquire_sample_rows(Relation onerel, int elevel,
@@ -1204,11 +1200,11 @@ acquire_sample_rows(Relation onerel, int elevel,
                                        0);
 
    /* Outer loop over blocks to sample */
-   while (scan_analyze_next_block(scan, stream))
+   while (table_scan_analyze_next_block(scan, stream))
    {
        vacuum_delay_point();
 
-       while (scan_analyze_next_tuple(scan, OldestXmin, &liverows, &deadrows, slot))
+       while (table_scan_analyze_next_tuple(scan, OldestXmin, &liverows, &deadrows, slot))
        {
            /*
             * The first targrows sample rows are simply copied into the
@@ -1331,25 +1327,6 @@ compare_rows(const void *a, const void *b, void *arg)
    return 0;
 }
 
-/*
- * block_level_table_analyze -- implementation of relation_analyze() for
- *                             block-level table access methods
- */
-void
-block_level_table_analyze(Relation relation,
-                         AcquireSampleRowsFunc *func,
-                         BlockNumber *totalpages,
-                         BufferAccessStrategy bstrategy,
-                         ScanAnalyzeNextBlockFunc scan_analyze_next_block_cb,
-                         ScanAnalyzeNextTupleFunc scan_analyze_next_tuple_cb)
-{
-   *func = acquire_sample_rows;
-   *totalpages = RelationGetNumberOfBlocks(relation);
-   vac_strategy = bstrategy;
-   scan_analyze_next_block = scan_analyze_next_block_cb;
-   scan_analyze_next_tuple = scan_analyze_next_tuple_cb;
-}
-
 
 /*
  * acquire_inherited_sample_rows -- acquire sample rows from inheritance tree
@@ -1439,9 +1416,9 @@ acquire_inherited_sample_rows(Relation onerel, int elevel,
        if (childrel->rd_rel->relkind == RELKIND_RELATION ||
            childrel->rd_rel->relkind == RELKIND_MATVIEW)
        {
-           /* Use row acquisition function provided by table AM */
-           table_relation_analyze(childrel, &acquirefunc,
-                                  &relpages, vac_strategy);
+           /* Regular table, so use the regular row acquisition function */
+           acquirefunc = acquire_sample_rows;
+           relpages = RelationGetNumberOfBlocks(childrel);
        }
        else if (childrel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
        {
index 735662dc9df67e05e113f29130ef9fc75f0b3dd8..c47a5045cef1396f85476f58d69255ce23233b87 100644 (file)
@@ -409,14 +409,6 @@ extern bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple);
 extern bool HeapTupleIsSurelyDead(HeapTuple htup,
                                  struct GlobalVisState *vistest);
 
-/* in heap/heapam_handler.c*/
-extern bool heapam_scan_analyze_next_block(TableScanDesc scan,
-                                          ReadStream *stream);
-extern bool heapam_scan_analyze_next_tuple(TableScanDesc scan,
-                                          TransactionId OldestXmin,
-                                          double *liverows, double *deadrows,
-                                          TupleTableSlot *slot);
-
 /*
  * To avoid leaking too much knowledge about reorderbuffer implementation
  * details this is implemented in reorderbuffer.c not heapam_visibility.c
index 1cc395317e52626ef122b3c48621788fe0407664..8e583b45cd54791f2324606ca280871957fafa09 100644 (file)
@@ -20,8 +20,8 @@
 #include "access/relscan.h"
 #include "access/sdir.h"
 #include "access/xact.h"
-#include "commands/vacuum.h"
 #include "executor/tuptable.h"
+#include "storage/read_stream.h"
 #include "utils/rel.h"
 #include "utils/snapshot.h"
 
@@ -655,6 +655,50 @@ typedef struct TableAmRoutine
                                    struct VacuumParams *params,
                                    BufferAccessStrategy bstrategy);
 
+   /*
+    * Prepare to analyze the next block in the read stream.  Returns false if
+    * the stream is exhausted and true otherwise. The scan must have been
+    * started with SO_TYPE_ANALYZE option.
+    *
+    * This routine holds a buffer pin and lock on the heap page.  They are
+    * held until heapam_scan_analyze_next_tuple() returns false.  That is
+    * until all the items of the heap page are analyzed.
+    */
+
+   /*
+    * Prepare to analyze block `blockno` of `scan`. The scan has been started
+    * with table_beginscan_analyze().  See also
+    * table_scan_analyze_next_block().
+    *
+    * The callback may acquire resources like locks that are held until
+    * table_scan_analyze_next_tuple() returns false. It e.g. can make sense
+    * to hold a lock until all tuples on a block have been analyzed by
+    * scan_analyze_next_tuple.
+    *
+    * The callback can return false if the block is not suitable for
+    * sampling, e.g. because it's a metapage that could never contain tuples.
+    *
+    * XXX: This obviously is primarily suited for block-based AMs. It's not
+    * clear what a good interface for non block based AMs would be, so there
+    * isn't one yet.
+    */
+   bool        (*scan_analyze_next_block) (TableScanDesc scan,
+                                           ReadStream *stream);
+
+   /*
+    * See table_scan_analyze_next_tuple().
+    *
+    * Not every AM might have a meaningful concept of dead rows, in which
+    * case it's OK to not increment *deadrows - but note that that may
+    * influence autovacuum scheduling (see comment for relation_vacuum
+    * callback).
+    */
+   bool        (*scan_analyze_next_tuple) (TableScanDesc scan,
+                                           TransactionId OldestXmin,
+                                           double *liverows,
+                                           double *deadrows,
+                                           TupleTableSlot *slot);
+
    /* see table_index_build_range_scan for reference about parameters */
    double      (*index_build_range_scan) (Relation table_rel,
                                           Relation index_rel,
@@ -675,12 +719,6 @@ typedef struct TableAmRoutine
                                        Snapshot snapshot,
                                        struct ValidateIndexState *state);
 
-   /* See table_relation_analyze() */
-   void        (*relation_analyze) (Relation relation,
-                                    AcquireSampleRowsFunc *func,
-                                    BlockNumber *totalpages,
-                                    BufferAccessStrategy bstrategy);
-
 
    /* ------------------------------------------------------------------------
     * Miscellaneous functions.
@@ -1682,6 +1720,40 @@ table_relation_vacuum(Relation rel, struct VacuumParams *params,
    rel->rd_tableam->relation_vacuum(rel, params, bstrategy);
 }
 
+/*
+ * Prepare to analyze the next block in the read stream. The scan needs to
+ * have been  started with table_beginscan_analyze().  Note that this routine
+ * might acquire resources like locks that are held until
+ * table_scan_analyze_next_tuple() returns false.
+ *
+ * Returns false if block is unsuitable for sampling, true otherwise.
+ */
+static inline bool
+table_scan_analyze_next_block(TableScanDesc scan, ReadStream *stream)
+{
+   return scan->rs_rd->rd_tableam->scan_analyze_next_block(scan, stream);
+}
+
+/*
+ * Iterate over tuples in the block selected with
+ * table_scan_analyze_next_block() (which needs to have returned true, and
+ * this routine may not have returned false for the same block before). If a
+ * tuple that's suitable for sampling is found, true is returned and a tuple
+ * is stored in `slot`.
+ *
+ * *liverows and *deadrows are incremented according to the encountered
+ * tuples.
+ */
+static inline bool
+table_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
+                             double *liverows, double *deadrows,
+                             TupleTableSlot *slot)
+{
+   return scan->rs_rd->rd_tableam->scan_analyze_next_tuple(scan, OldestXmin,
+                                                           liverows, deadrows,
+                                                           slot);
+}
+
 /*
  * table_index_build_scan - scan the table to find tuples to be indexed
  *
@@ -1787,21 +1859,6 @@ table_index_validate_scan(Relation table_rel,
                                               state);
 }
 
-/*
- * table_relation_analyze - fill the infromation for a sampling statistics
- *                         acquisition
- *
- * The pointer to a function that will collect sample rows from the table
- * should be stored to `*func`, plus the estimated size of the table in pages
- * should br stored to `*totalpages`.
- */
-static inline void
-table_relation_analyze(Relation relation, AcquireSampleRowsFunc *func,
-                      BlockNumber *totalpages, BufferAccessStrategy bstrategy)
-{
-   relation->rd_tableam->relation_analyze(relation, func,
-                                          totalpages, bstrategy);
-}
 
 /* ----------------------------------------------------------------------------
  * Miscellaneous functionality
index 12a03abb75a56ec9a442666c907bf3999e5705f7..759f9a87d38e559c5f718b65e1c0f89dbbc16693 100644 (file)
 #include "catalog/pg_class.h"
 #include "catalog/pg_statistic.h"
 #include "catalog/pg_type.h"
-#include "executor/tuptable.h"
 #include "parser/parse_node.h"
 #include "storage/buf.h"
 #include "storage/lock.h"
-#include "storage/read_stream.h"
 #include "utils/relcache.h"
 
 /*
@@ -178,21 +176,6 @@ typedef struct VacAttrStats
    int         rowstride;
 } VacAttrStats;
 
-/*
- * AcquireSampleRowsFunc - a function for the sampling statistics collection.
- *
- * A random sample of up to `targrows` rows should be collected from the
- * table and stored into the caller-provided `rows` array.  The actual number
- * of rows collected must be returned.  In addition, a function should store
- * estimates of the total numbers of live and dead rows in the table into the
- * output parameters `*totalrows` and `*totaldeadrows1.  (Set `*totaldeadrows`
- * to zero if the storage does not have any concept of dead rows.)
- */
-typedef int (*AcquireSampleRowsFunc) (Relation relation, int elevel,
-                                     HeapTuple *rows, int targrows,
-                                     double *totalrows,
-                                     double *totaldeadrows);
-
 /* flag bits for VacuumParams->options */
 #define VACOPT_VACUUM 0x01     /* do VACUUM */
 #define VACOPT_ANALYZE 0x02        /* do ANALYZE */
@@ -392,61 +375,9 @@ extern void parallel_vacuum_cleanup_all_indexes(ParallelVacuumState *pvs,
 extern void parallel_vacuum_main(dsm_segment *seg, shm_toc *toc);
 
 /* in commands/analyze.c */
-
-struct TableScanDescData;
-
-
-/*
- * A callback to prepare to analyze block from `stream` of `scan`. The scan
- * has been started with table_beginscan_analyze().
- *
- * The callback may acquire resources like locks that are held until
- * ScanAnalyzeNextTupleFunc returns false. In some cases it could be
- * useful to hold a lock until all tuples in a block have been analyzed by
- * ScanAnalyzeNextTupleFunc.
- *
- * The callback can return false if the block is not suitable for
- * sampling, e.g. because it's a metapage that could never contain tuples.
- *
- * This is primarily suited for block-based AMs. It's not clear what a
- * good interface for non block-based AMs would be, so there isn't one
- * yet and sampling using a custom implementation of acquire_sample_rows
- * may be preferred.
- */
-typedef bool (*ScanAnalyzeNextBlockFunc) (struct TableScanDescData *scan,
-                                         ReadStream *stream);
-
-/*
- * A callback to iterate over tuples in the block selected with
- * ScanAnalyzeNextBlockFunc (which needs to have returned true, and
- * this routine may not have returned false for the same block before). If
- * a tuple that's suitable for sampling is found, true is returned and a
- * tuple is stored in `slot`.
- *
- * *liverows and *deadrows are incremented according to the encountered
- * tuples.
- *
- * Not every AM might have a meaningful concept of dead rows, in which
- * case it's OK to not increment *deadrows - but note that that may
- * influence autovacuum scheduling (see comment for relation_vacuum
- * callback).
- */
-typedef bool (*ScanAnalyzeNextTupleFunc) (struct TableScanDescData *scan,
-                                         TransactionId OldestXmin,
-                                         double *liverows,
-                                         double *deadrows,
-                                         TupleTableSlot *slot);
-
 extern void analyze_rel(Oid relid, RangeVar *relation,
                        VacuumParams *params, List *va_cols, bool in_outer_xact,
                        BufferAccessStrategy bstrategy);
-extern void block_level_table_analyze(Relation relation,
-                                     AcquireSampleRowsFunc *func,
-                                     BlockNumber *totalpages,
-                                     BufferAccessStrategy bstrategy,
-                                     ScanAnalyzeNextBlockFunc scan_analyze_next_block_cb,
-                                     ScanAnalyzeNextTupleFunc scan_analyze_next_tuple_cb);
-
 extern bool std_typanalyze(VacAttrStats *stats);
 
 /* in utils/misc/sampling.c --- duplicate of declarations in utils/sampling.h */
index 0968e0a01ec6efa1cfbed26726da1915df970f33..7f0475d2fa76bca69514a2a7856dbdfdc6cc7b77 100644 (file)
@@ -13,7 +13,6 @@
 #define FDWAPI_H
 
 #include "access/parallel.h"
-#include "commands/vacuum.h"
 #include "nodes/execnodes.h"
 #include "nodes/pathnodes.h"
 
@@ -149,8 +148,13 @@ typedef void (*ExplainForeignModify_function) (ModifyTableState *mtstate,
 typedef void (*ExplainDirectModify_function) (ForeignScanState *node,
                                              struct ExplainState *es);
 
+typedef int (*AcquireSampleRowsFunc) (Relation relation, int elevel,
+                                     HeapTuple *rows, int targrows,
+                                     double *totalrows,
+                                     double *totaldeadrows);
+
 typedef bool (*AnalyzeForeignTable_function) (Relation relation,
-                                             AcquireSampleRowsFunc *func,
+                                             AcquireSampleRowsFunc * func,
                                              BlockNumber *totalpages);
 
 typedef List *(*ImportForeignSchema_function) (ImportForeignSchemaStmt *stmt,
index cf05701c032fee4135bd0a5b73abc968326caf11..eae5da8b6ed9936c94834f475804975871fa4155 100644 (file)
@@ -22,7 +22,6 @@ AclItem
 AclMaskHow
 AclMode
 AclResult
-AcquireSampleRowsFunc
 ActionList
 ActiveSnapshotElt
 AddForeignUpdateTargets_function
@@ -2530,8 +2529,6 @@ ScalarIOData
 ScalarItem
 ScalarMCVItem
 Scan
-ScanAnalyzeNextBlockFunc
-ScanAnalyzeNextTupleFunc
 ScanDirection
 ScanKey
 ScanKeyData