revert: Generalize relation analyze in table AM interface
authorAlexander Korotkov <akorotkov@postgresql.org>
Tue, 16 Apr 2024 10:14:20 +0000 (13:14 +0300)
committerAlexander Korotkov <akorotkov@postgresql.org>
Tue, 16 Apr 2024 10:14:20 +0000 (13:14 +0300)
This commit reverts 27bc1772fc and dd1f6b0c17.  Per review by Andres Freund.

Discussion: https://postgr.es/m/20240415201057.khoyxbwwxfgzomeo%40awork3.anarazel.de

src/backend/access/heap/heapam_handler.c
src/backend/access/table/tableamapi.c
src/backend/commands/analyze.c
src/include/access/heapam.h
src/include/access/tableam.h
src/include/commands/vacuum.h
src/include/foreign/fdwapi.h
src/tools/pgindent/typedefs.list

index 3428d80817cd620d43aed475fe6c156c33027578..6f8b1b79298f3a364b95b76f93bd7d3814f362dd 100644 (file)
@@ -1002,7 +1002,7 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
  * until heapam_scan_analyze_next_tuple() returns false.  That is until all the
  * items of the heap page are analyzed.
  */
-bool
+static bool
 heapam_scan_analyze_next_block(TableScanDesc scan, ReadStream *stream)
 {
        HeapScanDesc hscan = (HeapScanDesc) scan;
@@ -1026,17 +1026,7 @@ heapam_scan_analyze_next_block(TableScanDesc scan, ReadStream *stream)
        return true;
 }
 
-/*
- * Iterate over tuples in the block selected with
- * heapam_scan_analyze_next_block().  If a tuple that's suitable for sampling
- * is found, true is returned and a tuple is stored in `slot`.  When no more
- * tuples for sampling, false is returned and the pin and lock acquired by
- * heapam_scan_analyze_next_block() are released.
- *
- * *liverows and *deadrows are incremented according to the encountered
- * tuples.
- */
-bool
+static bool
 heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
                                                           double *liverows, double *deadrows,
                                                           TupleTableSlot *slot)
@@ -2593,18 +2583,6 @@ SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
        }
 }
 
-/*
- * heapap_analyze -- implementation of relation_analyze() for heap
- *                                      table access method
- */
-static void
-heapam_analyze(Relation relation, AcquireSampleRowsFunc *func,
-                          BlockNumber *totalpages, BufferAccessStrategy bstrategy)
-{
-       block_level_table_analyze(relation, func, totalpages, bstrategy,
-                                                         heapam_scan_analyze_next_block,
-                                                         heapam_scan_analyze_next_tuple);
-}
 
 /* ------------------------------------------------------------------------
  * Definition of the heap table access method.
@@ -2652,9 +2630,10 @@ static const TableAmRoutine heapam_methods = {
        .relation_copy_data = heapam_relation_copy_data,
        .relation_copy_for_cluster = heapam_relation_copy_for_cluster,
        .relation_vacuum = heap_vacuum_rel,
+       .scan_analyze_next_block = heapam_scan_analyze_next_block,
+       .scan_analyze_next_tuple = heapam_scan_analyze_next_tuple,
        .index_build_range_scan = heapam_index_build_range_scan,
        .index_validate_scan = heapam_index_validate_scan,
-       .relation_analyze = heapam_analyze,
 
        .relation_size = table_block_relation_size,
        .relation_needs_toast_table = heapam_relation_needs_toast_table,
index 55b8caeadf2c8b2d3b350d9661954e1f31852299..ce637a5a5d9931163bd1126b1826129e143aab07 100644 (file)
@@ -81,6 +81,8 @@ GetTableAmRoutine(Oid amhandler)
        Assert(routine->relation_copy_data != NULL);
        Assert(routine->relation_copy_for_cluster != NULL);
        Assert(routine->relation_vacuum != NULL);
+       Assert(routine->scan_analyze_next_block != NULL);
+       Assert(routine->scan_analyze_next_tuple != NULL);
        Assert(routine->index_build_range_scan != NULL);
        Assert(routine->index_validate_scan != NULL);
 
index 516b43b0e341d2f37b7338c72be43fd0ada5954b..7d2cd24997292a0865cee0d51b7762e98d96590a 100644 (file)
@@ -17,7 +17,6 @@
 #include <math.h>
 
 #include "access/detoast.h"
-#include "access/heapam.h"
 #include "access/genam.h"
 #include "access/multixact.h"
 #include "access/relation.h"
@@ -76,8 +75,6 @@ int                   default_statistics_target = 100;
 /* A few variables that don't seem worth passing around as parameters */
 static MemoryContext anl_context = NULL;
 static BufferAccessStrategy vac_strategy;
-static ScanAnalyzeNextBlockFunc scan_analyze_next_block;
-static ScanAnalyzeNextTupleFunc scan_analyze_next_tuple;
 
 
 static void do_analyze_rel(Relation onerel,
@@ -90,6 +87,9 @@ static void compute_index_stats(Relation onerel, double totalrows,
                                                                MemoryContext col_context);
 static VacAttrStats *examine_attribute(Relation onerel, int attnum,
                                                                           Node *index_expr);
+static int     acquire_sample_rows(Relation onerel, int elevel,
+                                                               HeapTuple *rows, int targrows,
+                                                               double *totalrows, double *totaldeadrows);
 static int     compare_rows(const void *a, const void *b, void *arg);
 static int     acquire_inherited_sample_rows(Relation onerel, int elevel,
                                                                                  HeapTuple *rows, int targrows,
@@ -190,12 +190,10 @@ analyze_rel(Oid relid, RangeVar *relation,
        if (onerel->rd_rel->relkind == RELKIND_RELATION ||
                onerel->rd_rel->relkind == RELKIND_MATVIEW)
        {
-               /*
-                * Get row acquisition function, blocks and tuples iteration callbacks
-                * provided by table AM
-                */
-               table_relation_analyze(onerel, &acquirefunc,
-                                                          &relpages, vac_strategy);
+               /* Regular table, so we'll use the regular row acquisition function */
+               acquirefunc = acquire_sample_rows;
+               /* Also get regular table's size */
+               relpages = RelationGetNumberOfBlocks(onerel);
        }
        else if (onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
        {
@@ -1119,17 +1117,15 @@ block_sampling_read_stream_next(ReadStream *stream,
 }
 
 /*
- * acquire_sample_rows -- acquire a random sample of rows from the
- * block-based relation
+ * acquire_sample_rows -- acquire a random sample of rows from the table
  *
  * Selected rows are returned in the caller-allocated array rows[], which
  * must have at least targrows entries.
  * The actual number of rows selected is returned as the function result.
- * We also estimate the total numbers of live and dead rows in the relation,
+ * We also estimate the total numbers of live and dead rows in the table,
  * and return them into *totalrows and *totaldeadrows, respectively.
  *
- * The returned list of tuples is in order by physical position in the
- * relation.
+ * The returned list of tuples is in order by physical position in the table.
  * (We will rely on this later to derive correlation estimates.)
  *
  * As of May 2004 we use a new two-stage method:  Stage one selects up
@@ -1151,7 +1147,7 @@ block_sampling_read_stream_next(ReadStream *stream,
  * look at a statistically unbiased set of blocks, we should get
  * unbiased estimates of the average numbers of live and dead rows per
  * block.  The previous sampling method put too much credence in the row
- * density near the start of the relation.
+ * density near the start of the table.
  */
 static int
 acquire_sample_rows(Relation onerel, int elevel,
@@ -1204,11 +1200,11 @@ acquire_sample_rows(Relation onerel, int elevel,
                                                                                0);
 
        /* Outer loop over blocks to sample */
-       while (scan_analyze_next_block(scan, stream))
+       while (table_scan_analyze_next_block(scan, stream))
        {
                vacuum_delay_point();
 
-               while (scan_analyze_next_tuple(scan, OldestXmin, &liverows, &deadrows, slot))
+               while (table_scan_analyze_next_tuple(scan, OldestXmin, &liverows, &deadrows, slot))
                {
                        /*
                         * The first targrows sample rows are simply copied into the
@@ -1331,25 +1327,6 @@ compare_rows(const void *a, const void *b, void *arg)
        return 0;
 }
 
-/*
- * block_level_table_analyze -- implementation of relation_analyze() for
- *                                                             block-level table access methods
- */
-void
-block_level_table_analyze(Relation relation,
-                                                 AcquireSampleRowsFunc *func,
-                                                 BlockNumber *totalpages,
-                                                 BufferAccessStrategy bstrategy,
-                                                 ScanAnalyzeNextBlockFunc scan_analyze_next_block_cb,
-                                                 ScanAnalyzeNextTupleFunc scan_analyze_next_tuple_cb)
-{
-       *func = acquire_sample_rows;
-       *totalpages = RelationGetNumberOfBlocks(relation);
-       vac_strategy = bstrategy;
-       scan_analyze_next_block = scan_analyze_next_block_cb;
-       scan_analyze_next_tuple = scan_analyze_next_tuple_cb;
-}
-
 
 /*
  * acquire_inherited_sample_rows -- acquire sample rows from inheritance tree
@@ -1439,9 +1416,9 @@ acquire_inherited_sample_rows(Relation onerel, int elevel,
                if (childrel->rd_rel->relkind == RELKIND_RELATION ||
                        childrel->rd_rel->relkind == RELKIND_MATVIEW)
                {
-                       /* Use row acquisition function provided by table AM */
-                       table_relation_analyze(childrel, &acquirefunc,
-                                                                  &relpages, vac_strategy);
+                       /* Regular table, so use the regular row acquisition function */
+                       acquirefunc = acquire_sample_rows;
+                       relpages = RelationGetNumberOfBlocks(childrel);
                }
                else if (childrel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
                {
index 735662dc9df67e05e113f29130ef9fc75f0b3dd8..c47a5045cef1396f85476f58d69255ce23233b87 100644 (file)
@@ -409,14 +409,6 @@ extern bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple);
 extern bool HeapTupleIsSurelyDead(HeapTuple htup,
                                                                  struct GlobalVisState *vistest);
 
-/* in heap/heapam_handler.c*/
-extern bool heapam_scan_analyze_next_block(TableScanDesc scan,
-                                                                                  ReadStream *stream);
-extern bool heapam_scan_analyze_next_tuple(TableScanDesc scan,
-                                                                                  TransactionId OldestXmin,
-                                                                                  double *liverows, double *deadrows,
-                                                                                  TupleTableSlot *slot);
-
 /*
  * To avoid leaking too much knowledge about reorderbuffer implementation
  * details this is implemented in reorderbuffer.c not heapam_visibility.c
index 1cc395317e52626ef122b3c48621788fe0407664..8e583b45cd54791f2324606ca280871957fafa09 100644 (file)
@@ -20,8 +20,8 @@
 #include "access/relscan.h"
 #include "access/sdir.h"
 #include "access/xact.h"
-#include "commands/vacuum.h"
 #include "executor/tuptable.h"
+#include "storage/read_stream.h"
 #include "utils/rel.h"
 #include "utils/snapshot.h"
 
@@ -655,6 +655,50 @@ typedef struct TableAmRoutine
                                                                        struct VacuumParams *params,
                                                                        BufferAccessStrategy bstrategy);
 
+       /*
+        * Prepare to analyze the next block in the read stream.  Returns false if
+        * the stream is exhausted and true otherwise. The scan must have been
+        * started with SO_TYPE_ANALYZE option.
+        *
+        * This routine holds a buffer pin and lock on the heap page.  They are
+        * held until heapam_scan_analyze_next_tuple() returns false.  That is
+        * until all the items of the heap page are analyzed.
+        */
+
+       /*
+        * Prepare to analyze block `blockno` of `scan`. The scan has been started
+        * with table_beginscan_analyze().  See also
+        * table_scan_analyze_next_block().
+        *
+        * The callback may acquire resources like locks that are held until
+        * table_scan_analyze_next_tuple() returns false. It e.g. can make sense
+        * to hold a lock until all tuples on a block have been analyzed by
+        * scan_analyze_next_tuple.
+        *
+        * The callback can return false if the block is not suitable for
+        * sampling, e.g. because it's a metapage that could never contain tuples.
+        *
+        * XXX: This obviously is primarily suited for block-based AMs. It's not
+        * clear what a good interface for non block based AMs would be, so there
+        * isn't one yet.
+        */
+       bool            (*scan_analyze_next_block) (TableScanDesc scan,
+                                                                                       ReadStream *stream);
+
+       /*
+        * See table_scan_analyze_next_tuple().
+        *
+        * Not every AM might have a meaningful concept of dead rows, in which
+        * case it's OK to not increment *deadrows - but note that that may
+        * influence autovacuum scheduling (see comment for relation_vacuum
+        * callback).
+        */
+       bool            (*scan_analyze_next_tuple) (TableScanDesc scan,
+                                                                                       TransactionId OldestXmin,
+                                                                                       double *liverows,
+                                                                                       double *deadrows,
+                                                                                       TupleTableSlot *slot);
+
        /* see table_index_build_range_scan for reference about parameters */
        double          (*index_build_range_scan) (Relation table_rel,
                                                                                   Relation index_rel,
@@ -675,12 +719,6 @@ typedef struct TableAmRoutine
                                                                                Snapshot snapshot,
                                                                                struct ValidateIndexState *state);
 
-       /* See table_relation_analyze() */
-       void            (*relation_analyze) (Relation relation,
-                                                                        AcquireSampleRowsFunc *func,
-                                                                        BlockNumber *totalpages,
-                                                                        BufferAccessStrategy bstrategy);
-
 
        /* ------------------------------------------------------------------------
         * Miscellaneous functions.
@@ -1682,6 +1720,40 @@ table_relation_vacuum(Relation rel, struct VacuumParams *params,
        rel->rd_tableam->relation_vacuum(rel, params, bstrategy);
 }
 
+/*
+ * Prepare to analyze the next block in the read stream. The scan needs to
+ * have been  started with table_beginscan_analyze().  Note that this routine
+ * might acquire resources like locks that are held until
+ * table_scan_analyze_next_tuple() returns false.
+ *
+ * Returns false if block is unsuitable for sampling, true otherwise.
+ */
+static inline bool
+table_scan_analyze_next_block(TableScanDesc scan, ReadStream *stream)
+{
+       return scan->rs_rd->rd_tableam->scan_analyze_next_block(scan, stream);
+}
+
+/*
+ * Iterate over tuples in the block selected with
+ * table_scan_analyze_next_block() (which needs to have returned true, and
+ * this routine may not have returned false for the same block before). If a
+ * tuple that's suitable for sampling is found, true is returned and a tuple
+ * is stored in `slot`.
+ *
+ * *liverows and *deadrows are incremented according to the encountered
+ * tuples.
+ */
+static inline bool
+table_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
+                                                         double *liverows, double *deadrows,
+                                                         TupleTableSlot *slot)
+{
+       return scan->rs_rd->rd_tableam->scan_analyze_next_tuple(scan, OldestXmin,
+                                                                                                                       liverows, deadrows,
+                                                                                                                       slot);
+}
+
 /*
  * table_index_build_scan - scan the table to find tuples to be indexed
  *
@@ -1787,21 +1859,6 @@ table_index_validate_scan(Relation table_rel,
                                                                                           state);
 }
 
-/*
- * table_relation_analyze - fill the infromation for a sampling statistics
- *                                                     acquisition
- *
- * The pointer to a function that will collect sample rows from the table
- * should be stored to `*func`, plus the estimated size of the table in pages
- * should br stored to `*totalpages`.
- */
-static inline void
-table_relation_analyze(Relation relation, AcquireSampleRowsFunc *func,
-                                          BlockNumber *totalpages, BufferAccessStrategy bstrategy)
-{
-       relation->rd_tableam->relation_analyze(relation, func,
-                                                                                  totalpages, bstrategy);
-}
 
 /* ----------------------------------------------------------------------------
  * Miscellaneous functionality
index 12a03abb75a56ec9a442666c907bf3999e5705f7..759f9a87d38e559c5f718b65e1c0f89dbbc16693 100644 (file)
 #include "catalog/pg_class.h"
 #include "catalog/pg_statistic.h"
 #include "catalog/pg_type.h"
-#include "executor/tuptable.h"
 #include "parser/parse_node.h"
 #include "storage/buf.h"
 #include "storage/lock.h"
-#include "storage/read_stream.h"
 #include "utils/relcache.h"
 
 /*
@@ -178,21 +176,6 @@ typedef struct VacAttrStats
        int                     rowstride;
 } VacAttrStats;
 
-/*
- * AcquireSampleRowsFunc - a function for the sampling statistics collection.
- *
- * A random sample of up to `targrows` rows should be collected from the
- * table and stored into the caller-provided `rows` array.  The actual number
- * of rows collected must be returned.  In addition, a function should store
- * estimates of the total numbers of live and dead rows in the table into the
- * output parameters `*totalrows` and `*totaldeadrows1.  (Set `*totaldeadrows`
- * to zero if the storage does not have any concept of dead rows.)
- */
-typedef int (*AcquireSampleRowsFunc) (Relation relation, int elevel,
-                                                                         HeapTuple *rows, int targrows,
-                                                                         double *totalrows,
-                                                                         double *totaldeadrows);
-
 /* flag bits for VacuumParams->options */
 #define VACOPT_VACUUM 0x01             /* do VACUUM */
 #define VACOPT_ANALYZE 0x02            /* do ANALYZE */
@@ -392,61 +375,9 @@ extern void parallel_vacuum_cleanup_all_indexes(ParallelVacuumState *pvs,
 extern void parallel_vacuum_main(dsm_segment *seg, shm_toc *toc);
 
 /* in commands/analyze.c */
-
-struct TableScanDescData;
-
-
-/*
- * A callback to prepare to analyze block from `stream` of `scan`. The scan
- * has been started with table_beginscan_analyze().
- *
- * The callback may acquire resources like locks that are held until
- * ScanAnalyzeNextTupleFunc returns false. In some cases it could be
- * useful to hold a lock until all tuples in a block have been analyzed by
- * ScanAnalyzeNextTupleFunc.
- *
- * The callback can return false if the block is not suitable for
- * sampling, e.g. because it's a metapage that could never contain tuples.
- *
- * This is primarily suited for block-based AMs. It's not clear what a
- * good interface for non block-based AMs would be, so there isn't one
- * yet and sampling using a custom implementation of acquire_sample_rows
- * may be preferred.
- */
-typedef bool (*ScanAnalyzeNextBlockFunc) (struct TableScanDescData *scan,
-                                                                                 ReadStream *stream);
-
-/*
- * A callback to iterate over tuples in the block selected with
- * ScanAnalyzeNextBlockFunc (which needs to have returned true, and
- * this routine may not have returned false for the same block before). If
- * a tuple that's suitable for sampling is found, true is returned and a
- * tuple is stored in `slot`.
- *
- * *liverows and *deadrows are incremented according to the encountered
- * tuples.
- *
- * Not every AM might have a meaningful concept of dead rows, in which
- * case it's OK to not increment *deadrows - but note that that may
- * influence autovacuum scheduling (see comment for relation_vacuum
- * callback).
- */
-typedef bool (*ScanAnalyzeNextTupleFunc) (struct TableScanDescData *scan,
-                                                                                 TransactionId OldestXmin,
-                                                                                 double *liverows,
-                                                                                 double *deadrows,
-                                                                                 TupleTableSlot *slot);
-
 extern void analyze_rel(Oid relid, RangeVar *relation,
                                                VacuumParams *params, List *va_cols, bool in_outer_xact,
                                                BufferAccessStrategy bstrategy);
-extern void block_level_table_analyze(Relation relation,
-                                                                         AcquireSampleRowsFunc *func,
-                                                                         BlockNumber *totalpages,
-                                                                         BufferAccessStrategy bstrategy,
-                                                                         ScanAnalyzeNextBlockFunc scan_analyze_next_block_cb,
-                                                                         ScanAnalyzeNextTupleFunc scan_analyze_next_tuple_cb);
-
 extern bool std_typanalyze(VacAttrStats *stats);
 
 /* in utils/misc/sampling.c --- duplicate of declarations in utils/sampling.h */
index 0968e0a01ec6efa1cfbed26726da1915df970f33..7f0475d2fa76bca69514a2a7856dbdfdc6cc7b77 100644 (file)
@@ -13,7 +13,6 @@
 #define FDWAPI_H
 
 #include "access/parallel.h"
-#include "commands/vacuum.h"
 #include "nodes/execnodes.h"
 #include "nodes/pathnodes.h"
 
@@ -149,8 +148,13 @@ typedef void (*ExplainForeignModify_function) (ModifyTableState *mtstate,
 typedef void (*ExplainDirectModify_function) (ForeignScanState *node,
                                                                                          struct ExplainState *es);
 
+typedef int (*AcquireSampleRowsFunc) (Relation relation, int elevel,
+                                                                         HeapTuple *rows, int targrows,
+                                                                         double *totalrows,
+                                                                         double *totaldeadrows);
+
 typedef bool (*AnalyzeForeignTable_function) (Relation relation,
-                                                                                         AcquireSampleRowsFunc *func,
+                                                                                         AcquireSampleRowsFunc * func,
                                                                                          BlockNumber *totalpages);
 
 typedef List *(*ImportForeignSchema_function) (ImportForeignSchemaStmt *stmt,
index cf05701c032fee4135bd0a5b73abc968326caf11..eae5da8b6ed9936c94834f475804975871fa4155 100644 (file)
@@ -22,7 +22,6 @@ AclItem
 AclMaskHow
 AclMode
 AclResult
-AcquireSampleRowsFunc
 ActionList
 ActiveSnapshotElt
 AddForeignUpdateTargets_function
@@ -2530,8 +2529,6 @@ ScalarIOData
 ScalarItem
 ScalarMCVItem
 Scan
-ScanAnalyzeNextBlockFunc
-ScanAnalyzeNextTupleFunc
 ScanDirection
 ScanKey
 ScanKeyData