revert: Generalize relation analyze in table AM interface

author Alexander Korotkov <akorotkov@postgresql.org>

Tue, 16 Apr 2024 10:14:20 +0000 (13:14 +0300)

committer Alexander Korotkov <akorotkov@postgresql.org>

Tue, 16 Apr 2024 10:14:20 +0000 (13:14 +0300)
author Alexander Korotkov <akorotkov@postgresql.org>
Tue, 16 Apr 2024 10:14:20 +0000 (13:14 +0300)
committer Alexander Korotkov <akorotkov@postgresql.org>
Tue, 16 Apr 2024 10:14:20 +0000 (13:14 +0300)
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c

index 3428d80817cd620d43aed475fe6c156c33027578..6f8b1b79298f3a364b95b76f93bd7d3814f362dd 100644 (file)
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -1002,7 +1002,7 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
   * until heapam_scan_analyze_next_tuple() returns false.  That is until all the
   * items of the heap page are analyzed.
   */
-bool
+static bool
  heapam_scan_analyze_next_block(TableScanDesc scan, ReadStream *stream)
  {
     HeapScanDesc hscan = (HeapScanDesc) scan;
@@ -1026,17 +1026,7 @@ heapam_scan_analyze_next_block(TableScanDesc scan, ReadStream *stream)
     return true;
  }
  
-/*
- * Iterate over tuples in the block selected with
- * heapam_scan_analyze_next_block().  If a tuple that's suitable for sampling
- * is found, true is returned and a tuple is stored in `slot`.  When no more
- * tuples for sampling, false is returned and the pin and lock acquired by
- * heapam_scan_analyze_next_block() are released.
- *
- * *liverows and *deadrows are incremented according to the encountered
- * tuples.
- */
-bool
+static bool
  heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
                                double *liverows, double *deadrows,
                                TupleTableSlot *slot)
@@ -2593,18 +2583,6 @@ SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
     }
  }
  
-/*
- * heapap_analyze -- implementation of relation_analyze() for heap
- *                  table access method
- */
-static void
-heapam_analyze(Relation relation, AcquireSampleRowsFunc *func,
-              BlockNumber *totalpages, BufferAccessStrategy bstrategy)
-{
-   block_level_table_analyze(relation, func, totalpages, bstrategy,
-                             heapam_scan_analyze_next_block,
-                             heapam_scan_analyze_next_tuple);
-}
  
  /* ------------------------------------------------------------------------
   * Definition of the heap table access method.
@@ -2652,9 +2630,10 @@ static const TableAmRoutine heapam_methods = {
     .relation_copy_data = heapam_relation_copy_data,
     .relation_copy_for_cluster = heapam_relation_copy_for_cluster,
     .relation_vacuum = heap_vacuum_rel,
+   .scan_analyze_next_block = heapam_scan_analyze_next_block,
+   .scan_analyze_next_tuple = heapam_scan_analyze_next_tuple,
     .index_build_range_scan = heapam_index_build_range_scan,
     .index_validate_scan = heapam_index_validate_scan,
-   .relation_analyze = heapam_analyze,
  
     .relation_size = table_block_relation_size,
     .relation_needs_toast_table = heapam_relation_needs_toast_table,
diff --git a/src/backend/access/table/tableamapi.c b/src/backend/access/table/tableamapi.c

index 55b8caeadf2c8b2d3b350d9661954e1f31852299..ce637a5a5d9931163bd1126b1826129e143aab07 100644 (file)
--- a/src/backend/access/table/tableamapi.c
+++ b/src/backend/access/table/tableamapi.c
@@ -81,6 +81,8 @@ GetTableAmRoutine(Oid amhandler)
     Assert(routine->relation_copy_data != NULL);
     Assert(routine->relation_copy_for_cluster != NULL);
     Assert(routine->relation_vacuum != NULL);
+   Assert(routine->scan_analyze_next_block != NULL);
+   Assert(routine->scan_analyze_next_tuple != NULL);
     Assert(routine->index_build_range_scan != NULL);
     Assert(routine->index_validate_scan != NULL);
  
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c

index 516b43b0e341d2f37b7338c72be43fd0ada5954b..7d2cd24997292a0865cee0d51b7762e98d96590a 100644 (file)
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -17,7 +17,6 @@
  #include <math.h>
  
  #include "access/detoast.h"
-#include "access/heapam.h"
  #include "access/genam.h"
  #include "access/multixact.h"
  #include "access/relation.h"
@@ -76,8 +75,6 @@ int           default_statistics_target = 100;
  /* A few variables that don't seem worth passing around as parameters */
  static MemoryContext anl_context = NULL;
  static BufferAccessStrategy vac_strategy;
-static ScanAnalyzeNextBlockFunc scan_analyze_next_block;
-static ScanAnalyzeNextTupleFunc scan_analyze_next_tuple;
  
  
  static void do_analyze_rel(Relation onerel,
@@ -90,6 +87,9 @@ static void compute_index_stats(Relation onerel, double totalrows,
                                 MemoryContext col_context);
  static VacAttrStats *examine_attribute(Relation onerel, int attnum,
                                        Node *index_expr);
+static int acquire_sample_rows(Relation onerel, int elevel,
+                               HeapTuple *rows, int targrows,
+                               double *totalrows, double *totaldeadrows);
  static int compare_rows(const void *a, const void *b, void *arg);
  static int acquire_inherited_sample_rows(Relation onerel, int elevel,
                                           HeapTuple *rows, int targrows,
@@ -190,12 +190,10 @@ analyze_rel(Oid relid, RangeVar *relation,
     if (onerel->rd_rel->relkind == RELKIND_RELATION ||
         onerel->rd_rel->relkind == RELKIND_MATVIEW)
     {
-       /*
-        * Get row acquisition function, blocks and tuples iteration callbacks
-        * provided by table AM
-        */
-       table_relation_analyze(onerel, &acquirefunc,
-                              &relpages, vac_strategy);
+       /* Regular table, so we'll use the regular row acquisition function */
+       acquirefunc = acquire_sample_rows;
+       /* Also get regular table's size */
+       relpages = RelationGetNumberOfBlocks(onerel);
     }
     else if (onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
     {
@@ -1119,17 +1117,15 @@ block_sampling_read_stream_next(ReadStream *stream,
  }
  
  /*
- * acquire_sample_rows -- acquire a random sample of rows from the
- * block-based relation
+ * acquire_sample_rows -- acquire a random sample of rows from the table
   *
   * Selected rows are returned in the caller-allocated array rows[], which
   * must have at least targrows entries.
   * The actual number of rows selected is returned as the function result.
- * We also estimate the total numbers of live and dead rows in the relation,
+ * We also estimate the total numbers of live and dead rows in the table,
   * and return them into *totalrows and *totaldeadrows, respectively.
   *
- * The returned list of tuples is in order by physical position in the
- * relation.
+ * The returned list of tuples is in order by physical position in the table.
   * (We will rely on this later to derive correlation estimates.)
   *
   * As of May 2004 we use a new two-stage method:  Stage one selects up
@@ -1151,7 +1147,7 @@ block_sampling_read_stream_next(ReadStream *stream,
   * look at a statistically unbiased set of blocks, we should get
   * unbiased estimates of the average numbers of live and dead rows per
   * block.  The previous sampling method put too much credence in the row
- * density near the start of the relation.
+ * density near the start of the table.
   */
  static int
  acquire_sample_rows(Relation onerel, int elevel,
@@ -1204,11 +1200,11 @@ acquire_sample_rows(Relation onerel, int elevel,
                                         0);
  
     /* Outer loop over blocks to sample */
-   while (scan_analyze_next_block(scan, stream))
+   while (table_scan_analyze_next_block(scan, stream))
     {
         vacuum_delay_point();
  
-       while (scan_analyze_next_tuple(scan, OldestXmin, &liverows, &deadrows, slot))
+       while (table_scan_analyze_next_tuple(scan, OldestXmin, &liverows, &deadrows, slot))
         {
             /*
              * The first targrows sample rows are simply copied into the
@@ -1331,25 +1327,6 @@ compare_rows(const void *a, const void *b, void *arg)
     return 0;
  }
  
-/*
- * block_level_table_analyze -- implementation of relation_analyze() for
- *                             block-level table access methods
- */
-void
-block_level_table_analyze(Relation relation,
-                         AcquireSampleRowsFunc *func,
-                         BlockNumber *totalpages,
-                         BufferAccessStrategy bstrategy,
-                         ScanAnalyzeNextBlockFunc scan_analyze_next_block_cb,
-                         ScanAnalyzeNextTupleFunc scan_analyze_next_tuple_cb)
-{
-   *func = acquire_sample_rows;
-   *totalpages = RelationGetNumberOfBlocks(relation);
-   vac_strategy = bstrategy;
-   scan_analyze_next_block = scan_analyze_next_block_cb;
-   scan_analyze_next_tuple = scan_analyze_next_tuple_cb;
-}
-
  
  /*
   * acquire_inherited_sample_rows -- acquire sample rows from inheritance tree
@@ -1439,9 +1416,9 @@ acquire_inherited_sample_rows(Relation onerel, int elevel,
         if (childrel->rd_rel->relkind == RELKIND_RELATION ||
             childrel->rd_rel->relkind == RELKIND_MATVIEW)
         {
-           /* Use row acquisition function provided by table AM */
-           table_relation_analyze(childrel, &acquirefunc,
-                                  &relpages, vac_strategy);
+           /* Regular table, so use the regular row acquisition function */
+           acquirefunc = acquire_sample_rows;
+           relpages = RelationGetNumberOfBlocks(childrel);
         }
         else if (childrel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
         {
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h

index 735662dc9df67e05e113f29130ef9fc75f0b3dd8..c47a5045cef1396f85476f58d69255ce23233b87 100644 (file)
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -409,14 +409,6 @@ extern bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple);
  extern bool HeapTupleIsSurelyDead(HeapTuple htup,
                                   struct GlobalVisState *vistest);
  
-/* in heap/heapam_handler.c*/
-extern bool heapam_scan_analyze_next_block(TableScanDesc scan,
-                                          ReadStream *stream);
-extern bool heapam_scan_analyze_next_tuple(TableScanDesc scan,
-                                          TransactionId OldestXmin,
-                                          double *liverows, double *deadrows,
-                                          TupleTableSlot *slot);
-
  /*
   * To avoid leaking too much knowledge about reorderbuffer implementation
   * details this is implemented in reorderbuffer.c not heapam_visibility.c
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h

index 1cc395317e52626ef122b3c48621788fe0407664..8e583b45cd54791f2324606ca280871957fafa09 100644 (file)
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -20,8 +20,8 @@
  #include "access/relscan.h"
  #include "access/sdir.h"
  #include "access/xact.h"
-#include "commands/vacuum.h"
  #include "executor/tuptable.h"
+#include "storage/read_stream.h"
  #include "utils/rel.h"
  #include "utils/snapshot.h"
  
@@ -655,6 +655,50 @@ typedef struct TableAmRoutine
                                     struct VacuumParams *params,
                                     BufferAccessStrategy bstrategy);
  
+   /*
+    * Prepare to analyze the next block in the read stream.  Returns false if
+    * the stream is exhausted and true otherwise. The scan must have been
+    * started with SO_TYPE_ANALYZE option.
+    *
+    * This routine holds a buffer pin and lock on the heap page.  They are
+    * held until heapam_scan_analyze_next_tuple() returns false.  That is
+    * until all the items of the heap page are analyzed.
+    */
+
+   /*
+    * Prepare to analyze block `blockno` of `scan`. The scan has been started
+    * with table_beginscan_analyze().  See also
+    * table_scan_analyze_next_block().
+    *
+    * The callback may acquire resources like locks that are held until
+    * table_scan_analyze_next_tuple() returns false. It e.g. can make sense
+    * to hold a lock until all tuples on a block have been analyzed by
+    * scan_analyze_next_tuple.
+    *
+    * The callback can return false if the block is not suitable for
+    * sampling, e.g. because it's a metapage that could never contain tuples.
+    *
+    * XXX: This obviously is primarily suited for block-based AMs. It's not
+    * clear what a good interface for non block based AMs would be, so there
+    * isn't one yet.
+    */
+   bool        (*scan_analyze_next_block) (TableScanDesc scan,
+                                           ReadStream *stream);
+
+   /*
+    * See table_scan_analyze_next_tuple().
+    *
+    * Not every AM might have a meaningful concept of dead rows, in which
+    * case it's OK to not increment *deadrows - but note that that may
+    * influence autovacuum scheduling (see comment for relation_vacuum
+    * callback).
+    */
+   bool        (*scan_analyze_next_tuple) (TableScanDesc scan,
+                                           TransactionId OldestXmin,
+                                           double *liverows,
+                                           double *deadrows,
+                                           TupleTableSlot *slot);
+
     /* see table_index_build_range_scan for reference about parameters */
     double      (*index_build_range_scan) (Relation table_rel,
                                            Relation index_rel,
@@ -675,12 +719,6 @@ typedef struct TableAmRoutine
                                         Snapshot snapshot,
                                         struct ValidateIndexState *state);
  
-   /* See table_relation_analyze() */
-   void        (*relation_analyze) (Relation relation,
-                                    AcquireSampleRowsFunc *func,
-                                    BlockNumber *totalpages,
-                                    BufferAccessStrategy bstrategy);
-
  
     /* ------------------------------------------------------------------------
      * Miscellaneous functions.
@@ -1682,6 +1720,40 @@ table_relation_vacuum(Relation rel, struct VacuumParams *params,
     rel->rd_tableam->relation_vacuum(rel, params, bstrategy);
  }
  
+/*
+ * Prepare to analyze the next block in the read stream. The scan needs to
+ * have been  started with table_beginscan_analyze().  Note that this routine
+ * might acquire resources like locks that are held until
+ * table_scan_analyze_next_tuple() returns false.
+ *
+ * Returns false if block is unsuitable for sampling, true otherwise.
+ */
+static inline bool
+table_scan_analyze_next_block(TableScanDesc scan, ReadStream *stream)
+{
+   return scan->rs_rd->rd_tableam->scan_analyze_next_block(scan, stream);
+}
+
+/*
+ * Iterate over tuples in the block selected with
+ * table_scan_analyze_next_block() (which needs to have returned true, and
+ * this routine may not have returned false for the same block before). If a
+ * tuple that's suitable for sampling is found, true is returned and a tuple
+ * is stored in `slot`.
+ *
+ * *liverows and *deadrows are incremented according to the encountered
+ * tuples.
+ */
+static inline bool
+table_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
+                             double *liverows, double *deadrows,
+                             TupleTableSlot *slot)
+{
+   return scan->rs_rd->rd_tableam->scan_analyze_next_tuple(scan, OldestXmin,
+                                                           liverows, deadrows,
+                                                           slot);
+}
+
  /*
   * table_index_build_scan - scan the table to find tuples to be indexed
   *
@@ -1787,21 +1859,6 @@ table_index_validate_scan(Relation table_rel,
                                                state);
  }
  
-/*
- * table_relation_analyze - fill the infromation for a sampling statistics
- *                         acquisition
- *
- * The pointer to a function that will collect sample rows from the table
- * should be stored to `*func`, plus the estimated size of the table in pages
- * should br stored to `*totalpages`.
- */
-static inline void
-table_relation_analyze(Relation relation, AcquireSampleRowsFunc *func,
-                      BlockNumber *totalpages, BufferAccessStrategy bstrategy)
-{
-   relation->rd_tableam->relation_analyze(relation, func,
-                                          totalpages, bstrategy);
-}
  
  /* ----------------------------------------------------------------------------
   * Miscellaneous functionality
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h

index 12a03abb75a56ec9a442666c907bf3999e5705f7..759f9a87d38e559c5f718b65e1c0f89dbbc16693 100644 (file)
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -21,11 +21,9 @@
  #include "catalog/pg_class.h"
  #include "catalog/pg_statistic.h"
  #include "catalog/pg_type.h"
-#include "executor/tuptable.h"
  #include "parser/parse_node.h"
  #include "storage/buf.h"
  #include "storage/lock.h"
-#include "storage/read_stream.h"
  #include "utils/relcache.h"
  
  /*
@@ -178,21 +176,6 @@ typedef struct VacAttrStats
     int         rowstride;
  } VacAttrStats;
  
-/*
- * AcquireSampleRowsFunc - a function for the sampling statistics collection.
- *
- * A random sample of up to `targrows` rows should be collected from the
- * table and stored into the caller-provided `rows` array.  The actual number
- * of rows collected must be returned.  In addition, a function should store
- * estimates of the total numbers of live and dead rows in the table into the
- * output parameters `*totalrows` and `*totaldeadrows1.  (Set `*totaldeadrows`
- * to zero if the storage does not have any concept of dead rows.)
- */
-typedef int (*AcquireSampleRowsFunc) (Relation relation, int elevel,
-                                     HeapTuple *rows, int targrows,
-                                     double *totalrows,
-                                     double *totaldeadrows);
-
  /* flag bits for VacuumParams->options */
  #define VACOPT_VACUUM 0x01     /* do VACUUM */
  #define VACOPT_ANALYZE 0x02        /* do ANALYZE */
@@ -392,61 +375,9 @@ extern void parallel_vacuum_cleanup_all_indexes(ParallelVacuumState *pvs,
  extern void parallel_vacuum_main(dsm_segment *seg, shm_toc *toc);
  
  /* in commands/analyze.c */
-
-struct TableScanDescData;
-
-
-/*
- * A callback to prepare to analyze block from `stream` of `scan`. The scan
- * has been started with table_beginscan_analyze().
- *
- * The callback may acquire resources like locks that are held until
- * ScanAnalyzeNextTupleFunc returns false. In some cases it could be
- * useful to hold a lock until all tuples in a block have been analyzed by
- * ScanAnalyzeNextTupleFunc.
- *
- * The callback can return false if the block is not suitable for
- * sampling, e.g. because it's a metapage that could never contain tuples.
- *
- * This is primarily suited for block-based AMs. It's not clear what a
- * good interface for non block-based AMs would be, so there isn't one
- * yet and sampling using a custom implementation of acquire_sample_rows
- * may be preferred.
- */
-typedef bool (*ScanAnalyzeNextBlockFunc) (struct TableScanDescData *scan,
-                                         ReadStream *stream);
-
-/*
- * A callback to iterate over tuples in the block selected with
- * ScanAnalyzeNextBlockFunc (which needs to have returned true, and
- * this routine may not have returned false for the same block before). If
- * a tuple that's suitable for sampling is found, true is returned and a
- * tuple is stored in `slot`.
- *
- * *liverows and *deadrows are incremented according to the encountered
- * tuples.
- *
- * Not every AM might have a meaningful concept of dead rows, in which
- * case it's OK to not increment *deadrows - but note that that may
- * influence autovacuum scheduling (see comment for relation_vacuum
- * callback).
- */
-typedef bool (*ScanAnalyzeNextTupleFunc) (struct TableScanDescData *scan,
-                                         TransactionId OldestXmin,
-                                         double *liverows,
-                                         double *deadrows,
-                                         TupleTableSlot *slot);
-
  extern void analyze_rel(Oid relid, RangeVar *relation,
                         VacuumParams *params, List *va_cols, bool in_outer_xact,
                         BufferAccessStrategy bstrategy);
-extern void block_level_table_analyze(Relation relation,
-                                     AcquireSampleRowsFunc *func,
-                                     BlockNumber *totalpages,
-                                     BufferAccessStrategy bstrategy,
-                                     ScanAnalyzeNextBlockFunc scan_analyze_next_block_cb,
-                                     ScanAnalyzeNextTupleFunc scan_analyze_next_tuple_cb);
-
  extern bool std_typanalyze(VacAttrStats *stats);
  
  /* in utils/misc/sampling.c --- duplicate of declarations in utils/sampling.h */
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h

index 0968e0a01ec6efa1cfbed26726da1915df970f33..7f0475d2fa76bca69514a2a7856dbdfdc6cc7b77 100644 (file)
--- a/src/include/foreign/fdwapi.h
+++ b/src/include/foreign/fdwapi.h
@@ -13,7 +13,6 @@
  #define FDWAPI_H
  
  #include "access/parallel.h"
-#include "commands/vacuum.h"
  #include "nodes/execnodes.h"
  #include "nodes/pathnodes.h"
  
@@ -149,8 +148,13 @@ typedef void (*ExplainForeignModify_function) (ModifyTableState *mtstate,
  typedef void (*ExplainDirectModify_function) (ForeignScanState *node,
                                               struct ExplainState *es);
  
+typedef int (*AcquireSampleRowsFunc) (Relation relation, int elevel,
+                                     HeapTuple *rows, int targrows,
+                                     double *totalrows,
+                                     double *totaldeadrows);
+
  typedef bool (*AnalyzeForeignTable_function) (Relation relation,
-                                             AcquireSampleRowsFunc *func,
+                                             AcquireSampleRowsFunc * func,
                                               BlockNumber *totalpages);
  
  typedef List *(*ImportForeignSchema_function) (ImportForeignSchemaStmt *stmt,
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list

index cf05701c032fee4135bd0a5b73abc968326caf11..eae5da8b6ed9936c94834f475804975871fa4155 100644 (file)
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -22,7 +22,6 @@ AclItem
  AclMaskHow
  AclMode
  AclResult
-AcquireSampleRowsFunc
  ActionList
  ActiveSnapshotElt
  AddForeignUpdateTargets_function
@@ -2530,8 +2529,6 @@ ScalarIOData
  ScalarItem
  ScalarMCVItem
  Scan
-ScanAnalyzeNextBlockFunc
-ScanAnalyzeNextTupleFunc
  ScanDirection
  ScanKey
  ScanKeyData
author	Alexander Korotkov <akorotkov@postgresql.org>
	Tue, 16 Apr 2024 10:14:20 +0000 (13:14 +0300)
committer	Alexander Korotkov <akorotkov@postgresql.org>
	Tue, 16 Apr 2024 10:14:20 +0000 (13:14 +0300)
src/backend/access/heap/heapam_handler.c		patch \| blob \| blame \| history
src/backend/access/table/tableamapi.c		patch \| blob \| blame \| history
src/backend/commands/analyze.c		patch \| blob \| blame \| history
src/include/access/heapam.h		patch \| blob \| blame \| history
src/include/access/tableam.h		patch \| blob \| blame \| history
src/include/commands/vacuum.h		patch \| blob \| blame \| history
src/include/foreign/fdwapi.h		patch \| blob \| blame \| history
src/tools/pgindent/typedefs.list		patch \| blob \| blame \| history