Report progress of ANALYZE commands
authorAlvaro Herrera <alvherre@alvh.no-ip.org>
Wed, 15 Jan 2020 14:02:09 +0000 (11:02 -0300)
committerAlvaro Herrera <alvherre@alvh.no-ip.org>
Wed, 15 Jan 2020 14:14:39 +0000 (11:14 -0300)
This uses the progress reporting infrastructure added by c16dc1aca5e0,
adding support for ANALYZE.

Co-authored-by: Álvaro Herrera <alvherre@alvh.no-ip.org>
Co-authored-by: Tatsuro Yamada <tatsuro.yamada.tf@nttcom.co.jp>
Reviewed-by: Julien Rouhaud, Robert Haas, Anthony Nowocien, Kyotaro Horiguchi,
Vignesh C, Amit Langote

doc/src/sgml/monitoring.sgml
src/backend/catalog/system_views.sql
src/backend/commands/analyze.c
src/backend/statistics/extended_stats.c
src/backend/utils/adt/pgstatfuncs.c
src/backend/utils/misc/sampling.c
src/include/catalog/catversion.h
src/include/commands/progress.h
src/include/pgstat.h
src/include/utils/sampling.h
src/test/regress/expected/rules.out

index 136778cb8ab278fab19b86b5b02743687dc4c528..0bfd6151c4755b89e02a251b601b8fa5d7f334c6 100644 (file)
@@ -344,6 +344,14 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
       </entry>
      </row>
 
+     <row>
+      <entry><structname>pg_stat_progress_analyze</structname><indexterm><primary>pg_stat_progress_analyze</primary></indexterm></entry>
+      <entry>One row for each backend (including autovacuum worker processes) running
+       <command>ANALYZE</command>, showing current progress.
+       See <xref linkend='analyze-progress-reporting'/>.
+      </entry>
+     </row>
+
      <row>
       <entry><structname>pg_stat_progress_create_index</structname><indexterm><primary>pg_stat_progress_create_index</primary></indexterm></entry>
       <entry>One row for each backend running <command>CREATE INDEX</command> or <command>REINDEX</command>, showing
@@ -3505,11 +3513,185 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS pid,
   <para>
    <productname>PostgreSQL</productname> has the ability to report the progress of
    certain commands during command execution.  Currently, the only commands
-   which support progress reporting are <command>CREATE INDEX</command>,
-   <command>VACUUM</command> and
-   <command>CLUSTER</command>. This may be expanded in the future.
+   which support progress reporting are <command>ANALYZE</command>,
+   <command>CLUSTER</command>,
+   <command>CREATE INDEX</command>, and <command>VACUUM</command>.
+   This may be expanded in the future.
+  </para>
+
+ <sect2 id="analyze-progress-reporting">
+  <title>ANALYZE Progress Reporting</title>
+
+  <para>
+   Whenever <command>ANALYZE</command> is running, the
+   <structname>pg_stat_progress_analyze</structname> view will contain a
+   row for each backend that is currently running that command.  The tables
+   below describe the information that will be reported and provide
+   information about how to interpret it.
   </para>
 
+  <table id="pg-stat-progress-analyze-view" xreflabel="pg_stat_progress_analyze">
+   <title><structname>pg_stat_progress_analyze</structname> View</title>
+   <tgroup cols="3">
+    <thead>
+    <row>
+      <entry>Column</entry>
+      <entry>Type</entry>
+      <entry>Description</entry>
+     </row>
+    </thead>
+
+   <tbody>
+    <row>
+     <entry><structfield>pid</structfield></entry>
+     <entry><type>integer</type></entry>
+     <entry>Process ID of backend.</entry>
+    </row>
+    <row>
+     <entry><structfield>datid</structfield></entry>
+     <entry><type>oid</type></entry>
+     <entry>OID of the database to which this backend is connected.</entry>
+    </row>
+    <row>
+     <entry><structfield>datname</structfield></entry>
+     <entry><type>name</type></entry>
+     <entry>Name of the database to which this backend is connected.</entry>
+    </row>
+    <row>
+     <entry><structfield>relid</structfield></entry>
+     <entry><type>oid</type></entry>
+     <entry>OID of the table being analyzed.</entry>
+    </row>
+    <row>
+     <entry><structfield>phase</structfield></entry>
+     <entry><type>text</type></entry>
+     <entry>Current processing phase. See <xref linkend="analyze-phases" />.</entry>
+    </row>
+    <row>
+     <entry><structfield>sample_blks_total</structfield></entry>
+     <entry><type>bigint</type></entry>
+     <entry>
+       Total number of heap blocks that will be sampled.
+     </entry>
+    </row>
+    <row>
+     <entry><structfield>sample_blks_scanned</structfield></entry>
+     <entry><type>bigint</type></entry>
+     <entry>
+       Number of heap blocks scanned.
+     </entry>
+    </row>
+    <row>
+     <entry><structfield>ext_stats_total</structfield></entry>
+     <entry><type>bigint</type></entry>
+     <entry>
+       Number of extended statistics.
+     </entry>
+    </row>
+    <row>
+     <entry><structfield>ext_stats_computed</structfield></entry>
+     <entry><type>bigint</type></entry>
+     <entry>
+       Number of computed extended statistics computed.  This counter only advances when
+       the phase is <literal>computing extended statistics</literal>.
+     </entry>
+    </row>
+    <row>
+     <entry><structfield>child_tables_total</structfield></entry>
+     <entry><type>bigint</type></entry>
+     <entry>
+       Number of child tables.
+     </entry>
+    </row>
+    <row>
+     <entry><structfield>child_tables_done</structfield></entry>
+     <entry><type>bigint</type></entry>
+     <entry>
+       Number of child tables scanned.  This counter only advances when the phase
+       is <literal>acquiring inherited sample rows</literal>.
+     </entry>
+    </row>
+    <row>
+     <entry><structfield>current_child_table_relid</structfield></entry>
+     <entry><type>oid</type></entry>
+     <entry>OID of the child table currently being scanned. This field is only valid when
+       the phase is <literal>computing extended statistics</literal>.
+     </entry>
+    </row>
+   </tbody>
+   </tgroup>
+  </table>
+
+  <table id="analyze-phases">
+   <title>ANALYZE phases</title>
+   <tgroup cols="2">
+    <thead>
+    <row>
+      <entry>Phase</entry>
+      <entry>Description</entry>
+     </row>
+    </thead>
+   <tbody>
+    <row>
+     <entry><literal>initializing</literal></entry>
+     <entry>
+       The command is preparing to begin scanning the heap.  This phase is
+       expected to be very brief.
+     </entry>
+    </row>
+    <row>
+     <entry><literal>acquiring sample rows</literal></entry>
+     <entry>
+       The command is currently scanning the table given by
+       <structfield>current_relid</structfield> to obtain sample rows.
+     </entry>
+    </row>
+    <row>
+     <entry><literal>acquiring inherited sample rows</literal></entry>
+     <entry>
+       The command is currently scanning child tables to obtain sample rows. Columns
+       <structfield>child_tables_total</structfield>,
+       <structfield>child_tables_done</structfield>, and
+       <structfield>current_child_table_relid</structfield> contain the progress
+       information for this phase.
+     </entry>
+    </row>
+    <row>
+     <entry><literal>computing statistics</literal></entry>
+     <entry>
+       The command is computing statistics from the samples rows obtained during
+       the table scan.
+     </entry>
+    </row>
+    <row>
+     <entry><literal>computing extended statistics</literal></entry>
+     <entry>
+       The command is computing extended statistics from the samples rows obtained
+       durring the table scan.
+     </entry>
+    </row>
+    <row>
+     <entry><literal>finalizing analyze</literal></entry>
+     <entry>
+       The command is updating pg_class. When this phase is completed, 
+       <command>ANALYZE</command> will end.
+     </entry>
+    </row>
+   </tbody>
+   </tgroup>
+  </table>
+
+  <note>
+   <para>
+    Note that when <command>ANALYZE</command> is run on a partitioned table,
+    all of its partitions are also recursively analyzed as also mentioned on
+    <xref linkend="sql-analyze"/>.  In that case, <command>ANALYZE</command>
+    progress is reported first for the parent table, whereby its inheritance
+    statistics are collected, followed by that for each partition.
+   </para>
+  </note>
+ </sect2>
+
  <sect2 id="create-index-progress-reporting">
   <title>CREATE INDEX Progress Reporting</title>
 
index 773edf85e78c7a9322e3e222d930d1364d0b3445..b3e82de71e643ea1a2fedf5eada1a5dfbbd5d762 100644 (file)
@@ -957,6 +957,27 @@ CREATE VIEW pg_stat_bgwriter AS
         pg_stat_get_buf_alloc() AS buffers_alloc,
         pg_stat_get_bgwriter_stat_reset_time() AS stats_reset;
 
+CREATE VIEW pg_stat_progress_analyze AS
+    SELECT
+        S.pid AS pid, S.datid AS datid, D.datname AS datname,
+        CAST(S.relid AS oid) AS relid,
+        CASE S.param1 WHEN 0 THEN 'initializing'
+                      WHEN 1 THEN 'acquiring sample rows'
+                      WHEN 2 THEN 'acquiring inherited sample rows'
+                      WHEN 3 THEN 'computing statistics'
+                      WHEN 4 THEN 'computing extended statistics'
+                      WHEN 5 THEN 'finalizing analyze'
+                      END AS phase,
+        S.param2 AS sample_blks_total,
+        S.param3 AS sample_blks_scanned,
+        S.param4 AS ext_stats_total,
+        S.param5 AS ext_stats_computed,
+        S.param6 AS child_tables_total,
+        S.param7 AS child_tables_done,
+        CAST(S.param8 AS oid) AS current_child_table_relid
+    FROM pg_stat_get_progress_info('ANALYZE') AS S
+        LEFT JOIN pg_database D ON S.datid = D.oid;
+
 CREATE VIEW pg_stat_progress_vacuum AS
     SELECT
         S.pid AS pid, S.datid AS datid, D.datname AS datname,
index 53b11d7f09b44e62412e18ee380a57830457da03..c4420ddd7f2fc29c7b31e2069d1fd3dd752a9758 100644 (file)
@@ -35,6 +35,7 @@
 #include "catalog/pg_namespace.h"
 #include "catalog/pg_statistic_ext.h"
 #include "commands/dbcommands.h"
+#include "commands/progress.h"
 #include "commands/tablecmds.h"
 #include "commands/vacuum.h"
 #include "executor/executor.h"
@@ -251,6 +252,8 @@ analyze_rel(Oid relid, RangeVar *relation,
    LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
    MyPgXact->vacuumFlags |= PROC_IN_ANALYZE;
    LWLockRelease(ProcArrayLock);
+   pgstat_progress_start_command(PROGRESS_COMMAND_ANALYZE,
+                                 RelationGetRelid(onerel));
 
    /*
     * Do the normal non-recursive ANALYZE.  We can skip this for partitioned
@@ -275,6 +278,8 @@ analyze_rel(Oid relid, RangeVar *relation,
     */
    relation_close(onerel, NoLock);
 
+   pgstat_progress_end_command();
+
    /*
     * Reset my PGXACT flag.  Note: we need this here, and not in vacuum_rel,
     * because the vacuum flag is cleared by the end-of-xact code.
@@ -506,6 +511,9 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
     * Acquire the sample rows
     */
    rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple));
+   pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE,
+                                inh ? PROGRESS_ANALYZE_PHASE_ACQUIRE_SAMPLE_ROWS_INH :
+                                PROGRESS_ANALYZE_PHASE_ACQUIRE_SAMPLE_ROWS);
    if (inh)
        numrows = acquire_inherited_sample_rows(onerel, elevel,
                                                rows, targrows,
@@ -526,6 +534,9 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
        MemoryContext col_context,
                    old_context;
 
+       pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE,
+                                    PROGRESS_ANALYZE_PHASE_COMPUTE_STATS);
+
        col_context = AllocSetContextCreate(anl_context,
                                            "Analyze Column",
                                            ALLOCSET_DEFAULT_SIZES);
@@ -596,6 +607,9 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
                                       attr_cnt, vacattrstats);
    }
 
+   pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE,
+                                PROGRESS_ANALYZE_PHASE_FINALIZE_ANALYZE);
+
    /*
     * Update pages/tuples stats in pg_class ... but not if we're doing
     * inherited stats.
@@ -1034,6 +1048,8 @@ acquire_sample_rows(Relation onerel, int elevel,
    ReservoirStateData rstate;
    TupleTableSlot *slot;
    TableScanDesc scan;
+   BlockNumber nblocks;
+   BlockNumber blksdone = 0;
 
    Assert(targrows > 0);
 
@@ -1043,7 +1059,12 @@ acquire_sample_rows(Relation onerel, int elevel,
    OldestXmin = GetOldestXmin(onerel, PROCARRAY_FLAGS_VACUUM);
 
    /* Prepare for sampling block numbers */
-   BlockSampler_Init(&bs, totalblocks, targrows, random());
+   nblocks = BlockSampler_Init(&bs, totalblocks, targrows, random());
+
+   /* Report sampling block numbers */
+   pgstat_progress_update_param(PROGRESS_ANALYZE_BLOCKS_TOTAL,
+                                nblocks);
+
    /* Prepare for sampling rows */
    reservoir_init_selection_state(&rstate, targrows);
 
@@ -1104,6 +1125,9 @@ acquire_sample_rows(Relation onerel, int elevel,
 
            samplerows += 1;
        }
+
+       pgstat_progress_update_param(PROGRESS_ANALYZE_BLOCKS_DONE,
+                                    ++blksdone);
    }
 
    ExecDropSingleTupleTableSlot(slot);
@@ -1332,6 +1356,8 @@ acquire_inherited_sample_rows(Relation onerel, int elevel,
     * rels have radically different free-space percentages, but it's not
     * clear that it's worth working harder.)
     */
+   pgstat_progress_update_param(PROGRESS_ANALYZE_CHILD_TABLES_TOTAL,
+                                nrels);
    numrows = 0;
    *totalrows = 0;
    *totaldeadrows = 0;
@@ -1341,6 +1367,9 @@ acquire_inherited_sample_rows(Relation onerel, int elevel,
        AcquireSampleRowsFunc acquirefunc = acquirefuncs[i];
        double      childblocks = relblocks[i];
 
+       pgstat_progress_update_param(PROGRESS_ANALYZE_CURRENT_CHILD_TABLE_RELID,
+                                    RelationGetRelid(childrel));
+
        if (childblocks > 0)
        {
            int         childtargrows;
@@ -1396,6 +1425,8 @@ acquire_inherited_sample_rows(Relation onerel, int elevel,
         * pointers to their TOAST tables in the sampled rows.
         */
        table_close(childrel, NoLock);
+       pgstat_progress_update_param(PROGRESS_ANALYZE_CHILD_TABLES_DONE,
+                                    i + 1);
    }
 
    return numrows;
index d17b8d9b1f225efc46ad8bcb9f51e49de0cc9e4d..03e69d057ff6be312c56c6be62cdb8b268898400 100644 (file)
 #include "catalog/pg_collation.h"
 #include "catalog/pg_statistic_ext.h"
 #include "catalog/pg_statistic_ext_data.h"
+#include "commands/progress.h"
 #include "miscadmin.h"
 #include "nodes/nodeFuncs.h"
 #include "optimizer/clauses.h"
 #include "optimizer/optimizer.h"
+#include "pgstat.h"
 #include "postmaster/autovacuum.h"
 #include "statistics/extended_stats_internal.h"
 #include "statistics/statistics.h"
@@ -92,6 +94,7 @@ BuildRelationExtStatistics(Relation onerel, double totalrows,
    List       *stats;
    MemoryContext cxt;
    MemoryContext oldcxt;
+   int64       ext_cnt;
 
    cxt = AllocSetContextCreate(CurrentMemoryContext,
                                "BuildRelationExtStatistics",
@@ -101,6 +104,22 @@ BuildRelationExtStatistics(Relation onerel, double totalrows,
    pg_stext = table_open(StatisticExtRelationId, RowExclusiveLock);
    stats = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel));
 
+   /* report this phase */
+   if (stats != NIL)
+   {
+       const int   index[] = {
+           PROGRESS_ANALYZE_PHASE,
+           PROGRESS_ANALYZE_EXT_STATS_TOTAL
+       };
+       const int64 val[] = {
+           PROGRESS_ANALYZE_PHASE_COMPUTE_EXT_STATS,
+           list_length(stats)
+       };
+
+       pgstat_progress_update_multi_param(2, index, val);
+   }
+
+   ext_cnt = 0;
    foreach(lc, stats)
    {
        StatExtEntry *stat = (StatExtEntry *) lfirst(lc);
@@ -165,6 +184,10 @@ BuildRelationExtStatistics(Relation onerel, double totalrows,
 
        /* store the statistics in the catalog */
        statext_store(stat->statOid, ndistinct, dependencies, mcv, stats);
+
+       /* for reporting progress */
+       pgstat_progress_update_param(PROGRESS_ANALYZE_EXT_STATS_COMPUTED,
+                                    ++ext_cnt);
    }
 
    table_close(pg_stext, RowExclusiveLock);
index 3dbf6048acbc5e333e1966a8b3e68fb78e781875..74f899f24dfc8969231361ffe1ce83f62cd1973b 100644 (file)
@@ -468,6 +468,8 @@ pg_stat_get_progress_info(PG_FUNCTION_ARGS)
    /* Translate command name into command type code. */
    if (pg_strcasecmp(cmd, "VACUUM") == 0)
        cmdtype = PROGRESS_COMMAND_VACUUM;
+   else if (pg_strcasecmp(cmd, "ANALYZE") == 0)
+       cmdtype = PROGRESS_COMMAND_ANALYZE;
    else if (pg_strcasecmp(cmd, "CLUSTER") == 0)
        cmdtype = PROGRESS_COMMAND_CLUSTER;
    else if (pg_strcasecmp(cmd, "CREATE INDEX") == 0)
index daf75ee5d97ebf5a30cc13768391964b52fa6c3f..361c15614e7cda7b01fbd60e5d43bf3e1c410e7f 100644 (file)
  * Since we know the total number of blocks in advance, we can use the
  * straightforward Algorithm S from Knuth 3.4.2, rather than Vitter's
  * algorithm.
+ *
+ * Returns the number of blocks that BlockSampler_Next will return.
  */
-void
+BlockNumber
 BlockSampler_Init(BlockSampler bs, BlockNumber nblocks, int samplesize,
                  long randseed)
 {
@@ -48,6 +50,8 @@ BlockSampler_Init(BlockSampler bs, BlockNumber nblocks, int samplesize,
    bs->m = 0;                  /* blocks selected so far */
 
    sampler_random_init_state(randseed, bs->randstate);
+
+   return Min(bs->n, bs->N);
 }
 
 bool
index 40c1e8e0b85cff4843f0c78278c657696736025c..6ac57be6a08bf820eefd3be57df528fa5c730326 100644 (file)
@@ -53,6 +53,6 @@
  */
 
 /*                         yyyymmddN */
-#define CATALOG_VERSION_NO 202001091
+#define CATALOG_VERSION_NO 202001151
 
 #endif
index 3f91b3bf525e7507d2d1c4e734417b2068e5fad1..12e9d3d42f81debee44b152ad3bbafe9e11e9163 100644 (file)
 #define PROGRESS_VACUUM_PHASE_TRUNCATE         5
 #define PROGRESS_VACUUM_PHASE_FINAL_CLEANUP        6
 
+/* Progress parameters for analyze */
+#define PROGRESS_ANALYZE_PHASE                     0
+#define PROGRESS_ANALYZE_BLOCKS_TOTAL              1
+#define PROGRESS_ANALYZE_BLOCKS_DONE               2
+#define PROGRESS_ANALYZE_EXT_STATS_TOTAL           3
+#define PROGRESS_ANALYZE_EXT_STATS_COMPUTED            4
+#define PROGRESS_ANALYZE_CHILD_TABLES_TOTAL            5
+#define PROGRESS_ANALYZE_CHILD_TABLES_DONE         6
+#define PROGRESS_ANALYZE_CURRENT_CHILD_TABLE_RELID 7
+
+/* Phases of analyze (as advertised via PROGRESS_ANALYZE_PHASE) */
+#define PROGRESS_ANALYZE_PHASE_ACQUIRE_SAMPLE_ROWS     1
+#define PROGRESS_ANALYZE_PHASE_ACQUIRE_SAMPLE_ROWS_INH 2
+#define PROGRESS_ANALYZE_PHASE_COMPUTE_STATS           3
+#define PROGRESS_ANALYZE_PHASE_COMPUTE_EXT_STATS       4
+#define PROGRESS_ANALYZE_PHASE_FINALIZE_ANALYZE            5
+
 /* Progress parameters for cluster */
 #define PROGRESS_CLUSTER_COMMAND               0
 #define PROGRESS_CLUSTER_PHASE                 1
index e5a5d025ba6387e95ca87d7a05052457e0f51031..36b530bc27f19c1e3c68e2977407ae3d805161b9 100644 (file)
@@ -956,6 +956,7 @@ typedef enum ProgressCommandType
 {
    PROGRESS_COMMAND_INVALID,
    PROGRESS_COMMAND_VACUUM,
+   PROGRESS_COMMAND_ANALYZE,
    PROGRESS_COMMAND_CLUSTER,
    PROGRESS_COMMAND_CREATE_INDEX
 } ProgressCommandType;
index a84914b968aebed28984f3fc8c5a23bd25c27ffb..74646846b2021a0c7eadfe78f9e3c39f3382b74d 100644 (file)
@@ -37,8 +37,8 @@ typedef struct
 
 typedef BlockSamplerData *BlockSampler;
 
-extern void BlockSampler_Init(BlockSampler bs, BlockNumber nblocks,
-                             int samplesize, long randseed);
+extern BlockNumber BlockSampler_Init(BlockSampler bs, BlockNumber nblocks,
+                                    int samplesize, long randseed);
 extern bool BlockSampler_HasMore(BlockSampler bs);
 extern BlockNumber BlockSampler_Next(BlockSampler bs);
 
index 62eaf90a0f29ea1f5ad561bbdd0bfa7709402a41..70e1e2f78d12c7fe308907b6d20a766e5461dd05 100644 (file)
@@ -1852,6 +1852,28 @@ pg_stat_gssapi| SELECT s.pid,
     s.gss_enc AS encrypted
    FROM pg_stat_get_activity(NULL::integer) s(datid, pid, usesysid, application_name, state, query, wait_event_type, wait_event, xact_start, query_start, backend_start, state_change, client_addr, client_hostname, client_port, backend_xid, backend_xmin, backend_type, ssl, sslversion, sslcipher, sslbits, sslcompression, ssl_client_dn, ssl_client_serial, ssl_issuer_dn, gss_auth, gss_princ, gss_enc)
   WHERE (s.client_port IS NOT NULL);
+pg_stat_progress_analyze| SELECT s.pid,
+    s.datid,
+    d.datname,
+    s.relid,
+        CASE s.param1
+            WHEN 0 THEN 'initializing'::text
+            WHEN 1 THEN 'acquiring sample rows'::text
+            WHEN 2 THEN 'acquiring inherited sample rows'::text
+            WHEN 3 THEN 'computing statistics'::text
+            WHEN 4 THEN 'computing extended statistics'::text
+            WHEN 5 THEN 'finalizing analyze'::text
+            ELSE NULL::text
+        END AS phase,
+    s.param2 AS sample_blks_total,
+    s.param3 AS sample_blks_scanned,
+    s.param4 AS ext_stats_total,
+    s.param5 AS ext_stats_computed,
+    s.param6 AS child_tables_total,
+    s.param7 AS child_tables_done,
+    (s.param8)::oid AS current_child_table_relid
+   FROM (pg_stat_get_progress_info('ANALYZE'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20)
+     LEFT JOIN pg_database d ON ((s.datid = d.oid)));
 pg_stat_progress_cluster| SELECT s.pid,
     s.datid,
     d.datname,