{
"autovacuum_enabled",
"Enables autovacuum in this relation",
- RELOPT_KIND_HEAP | RELOPT_KIND_TOAST,
+ RELOPT_KIND_HEAP | RELOPT_KIND_TOAST | RELOPT_KIND_PARTITIONED,
ShareUpdateExclusiveLock
},
true
{
"autovacuum_analyze_threshold",
"Minimum number of tuple inserts, updates or deletes prior to analyze",
- RELOPT_KIND_HEAP,
+ RELOPT_KIND_HEAP | RELOPT_KIND_PARTITIONED,
ShareUpdateExclusiveLock
},
-1, 0, INT_MAX
{
"autovacuum_analyze_scale_factor",
"Number of tuple inserts, updates or deletes prior to analyze as a fraction of reltuples",
- RELOPT_KIND_HEAP,
+ RELOPT_KIND_HEAP | RELOPT_KIND_PARTITIONED,
ShareUpdateExclusiveLock
},
-1, 0.0, 100.0
partitioned_table_reloptions(Datum reloptions, bool validate)
{
/*
- * There are no options for partitioned tables yet, but this is able to do
- * some validation.
+ * autovacuum_enabled, autovacuum_analyze_threshold and
+ * autovacuum_analyze_scale_factor are supported for partitioned tables.
*/
- return (bytea *) build_reloptions(reloptions, validate,
- RELOPT_KIND_PARTITIONED,
- 0, NULL, 0);
+
+ return default_reloptions(reloptions, validate, RELOPT_KIND_PARTITIONED);
}
/*
FROM pg_class C LEFT JOIN
pg_index I ON C.oid = I.indrelid
LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace)
- WHERE C.relkind IN ('r', 't', 'm')
+ WHERE C.relkind IN ('r', 't', 'm', 'p')
GROUP BY C.oid, N.nspname, C.relname;
CREATE VIEW pg_stat_xact_all_tables AS
FROM pg_class C LEFT JOIN
pg_index I ON C.oid = I.indrelid
LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace)
- WHERE C.relkind IN ('r', 't', 'm')
+ WHERE C.relkind IN ('r', 't', 'm', 'p')
GROUP BY C.oid, N.nspname, C.relname;
CREATE VIEW pg_stat_sys_tables AS
PROGRESS_ANALYZE_PHASE_FINALIZE_ANALYZE);
/*
- * Update pages/tuples stats in pg_class, and report ANALYZE to the stats
- * collector ... but not if we're doing inherited stats.
+ * Update pages/tuples stats in pg_class ... but not if we're doing
+ * inherited stats.
*
* We assume that VACUUM hasn't set pg_class.reltuples already, even
* during a VACUUM ANALYZE. Although VACUUM often updates pg_class,
InvalidMultiXactId,
in_outer_xact);
}
+ }
- /*
- * Now report ANALYZE to the stats collector.
- *
- * We deliberately don't report to the stats collector when doing
- * inherited stats, because the stats collector only tracks per-table
- * stats.
- *
- * Reset the changes_since_analyze counter only if we analyzed all
- * columns; otherwise, there is still work for auto-analyze to do.
- */
+ /*
+ * Now report ANALYZE to the stats collector. For regular tables, we do
+ * it only if not doing inherited stats. For partitioned tables, we only
+ * do it for inherited stats. (We're never called for not-inherited stats
+ * on partitioned tables anyway.)
+ *
+ * Reset the changes_since_analyze counter only if we analyzed all
+ * columns; otherwise, there is still work for auto-analyze to do.
+ */
+ if (!inh || onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
pgstat_report_analyze(onerel, totalrows, totaldeadrows,
(va_cols == NIL));
+
+ /*
+ * If this is a manual analyze of all columns of a permanent leaf
+ * partition, and not doing inherited stats, also let the collector know
+ * about the ancestor tables of this partition. Autovacuum does the
+ * equivalent of this at the start of its run, so there's no reason to do
+ * it there.
+ */
+ if (!inh && !IsAutoVacuumWorkerProcess() &&
+ (va_cols == NIL) &&
+ onerel->rd_rel->relispartition &&
+ onerel->rd_rel->relkind == RELKIND_RELATION &&
+ onerel->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT)
+ {
+ pgstat_report_anl_ancestors(RelationGetRelid(onerel));
}
/*
#include "catalog/dependency.h"
#include "catalog/namespace.h"
#include "catalog/pg_database.h"
+#include "catalog/pg_inherits.h"
#include "commands/dbcommands.h"
#include "commands/vacuum.h"
#include "lib/ilist.h"
int effective_multixact_freeze_max_age;
bool did_vacuum = false;
bool found_concurrent_worker = false;
+ bool updated = false;
int i;
/*
/*
* Scan pg_class to determine which tables to vacuum.
*
- * We do this in two passes: on the first one we collect the list of plain
- * relations and materialized views, and on the second one we collect
- * TOAST tables. The reason for doing the second pass is that during it we
- * want to use the main relation's pg_class.reloptions entry if the TOAST
- * table does not have any, and we cannot obtain it unless we know
- * beforehand what's the main table OID.
+ * We do this in three passes: First we let pgstat collector know about
+ * the partitioned table ancestors of all partitions that have recently
+ * acquired rows for analyze. This informs the second pass about the
+ * total number of tuple count in partitioning hierarchies.
+ *
+ * On the second pass, we collect the list of plain relations,
+ * materialized views and partitioned tables. On the third one we collect
+ * TOAST tables.
+ *
+ * The reason for doing the third pass is that during it we want to use
+ * the main relation's pg_class.reloptions entry if the TOAST table does
+ * not have any, and we cannot obtain it unless we know beforehand what's
+ * the main table OID.
*
* We need to check TOAST tables separately because in cases with short,
* wide tables there might be proportionally much more activity in the
relScan = table_beginscan_catalog(classRel, 0, NULL);
/*
- * On the first pass, we collect main tables to vacuum, and also the main
+ * First pass: before collecting the list of tables to vacuum, let stat
+ * collector know about partitioned-table ancestors of each partition.
+ */
+ while ((tuple = heap_getnext(relScan, ForwardScanDirection)) != NULL)
+ {
+ Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
+ Oid relid = classForm->oid;
+ PgStat_StatTabEntry *tabentry;
+
+ /* Only consider permanent leaf partitions */
+ if (!classForm->relispartition ||
+ classForm->relkind == RELKIND_PARTITIONED_TABLE ||
+ classForm->relpersistence == RELPERSISTENCE_TEMP)
+ continue;
+
+ /*
+ * No need to do this for partitions that haven't acquired any rows.
+ */
+ tabentry = pgstat_fetch_stat_tabentry(relid);
+ if (tabentry &&
+ tabentry->changes_since_analyze -
+ tabentry->changes_since_analyze_reported > 0)
+ {
+ pgstat_report_anl_ancestors(relid);
+ updated = true;
+ }
+ }
+
+ /* Acquire fresh stats for the next passes, if needed */
+ if (updated)
+ {
+ autovac_refresh_stats();
+ dbentry = pgstat_fetch_stat_dbentry(MyDatabaseId);
+ shared = pgstat_fetch_stat_dbentry(InvalidOid);
+ }
+
+ /*
+ * On the second pass, we collect main tables to vacuum, and also the main
* table relid to TOAST relid mapping.
*/
while ((tuple = heap_getnext(relScan, ForwardScanDirection)) != NULL)
bool wraparound;
if (classForm->relkind != RELKIND_RELATION &&
- classForm->relkind != RELKIND_MATVIEW)
+ classForm->relkind != RELKIND_MATVIEW &&
+ classForm->relkind != RELKIND_PARTITIONED_TABLE)
continue;
relid = classForm->oid;
table_endscan(relScan);
- /* second pass: check TOAST tables */
+ /* third pass: check TOAST tables */
ScanKeyInit(&key,
Anum_pg_class_relkind,
BTEqualStrategyNumber, F_CHAREQ,
Assert(((Form_pg_class) GETSTRUCT(tup))->relkind == RELKIND_RELATION ||
((Form_pg_class) GETSTRUCT(tup))->relkind == RELKIND_MATVIEW ||
+ ((Form_pg_class) GETSTRUCT(tup))->relkind == RELKIND_PARTITIONED_TABLE ||
((Form_pg_class) GETSTRUCT(tup))->relkind == RELKIND_TOASTVALUE);
relopts = extractRelOptions(tup, pg_class_desc, NULL);
*/
if (PointerIsValid(tabentry) && AutoVacuumingActive())
{
- reltuples = classForm->reltuples;
+ if (classForm->relkind != RELKIND_PARTITIONED_TABLE)
+ {
+ reltuples = classForm->reltuples;
+ }
+ else
+ {
+ /*
+ * If the relation is a partitioned table, we must add up
+ * children's reltuples.
+ */
+ List *children;
+ ListCell *lc;
+
+ reltuples = 0;
+
+ /* Find all members of inheritance set taking AccessShareLock */
+ children = find_all_inheritors(relid, AccessShareLock, NULL);
+
+ foreach(lc, children)
+ {
+ Oid childOID = lfirst_oid(lc);
+ HeapTuple childtuple;
+ Form_pg_class childclass;
+
+ childtuple = SearchSysCache1(RELOID, ObjectIdGetDatum(childOID));
+ childclass = (Form_pg_class) GETSTRUCT(childtuple);
+
+ /* Skip a partitioned table and foreign partitions */
+ if (RELKIND_HAS_STORAGE(childclass->relkind))
+ {
+ /* Sum up the child's reltuples for its parent table */
+ reltuples += childclass->reltuples;
+ }
+ ReleaseSysCache(childtuple);
+ }
+
+ list_free(children);
+ }
vactuples = tabentry->n_dead_tuples;
instuples = tabentry->inserts_since_vacuum;
anltuples = tabentry->changes_since_analyze;
#include "access/transam.h"
#include "access/twophase_rmgr.h"
#include "access/xact.h"
+#include "catalog/partition.h"
#include "catalog/pg_database.h"
#include "catalog/pg_proc.h"
#include "common/ip.h"
static void pgstat_recv_autovac(PgStat_MsgAutovacStart *msg, int len);
static void pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len);
static void pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len);
+static void pgstat_recv_anl_ancestors(PgStat_MsgAnlAncestors *msg, int len);
static void pgstat_recv_archiver(PgStat_MsgArchiver *msg, int len);
static void pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len);
static void pgstat_recv_wal(PgStat_MsgWal *msg, int len);
*
* Caller must provide new live- and dead-tuples estimates, as well as a
* flag indicating whether to reset the changes_since_analyze counter.
+ * Exceptional support only changes_since_analyze for partitioned tables,
+ * though they don't have any data. This counter will tell us whether
+ * partitioned tables need autoanalyze or not.
* --------
*/
void
* be double-counted after commit. (This approach also ensures that the
* collector ends up with the right numbers if we abort instead of
* committing.)
+ *
+ * For partitioned tables, we don't report live and dead tuples, because
+ * such tables don't have any data.
*/
if (rel->pgstat_info != NULL)
{
PgStat_TableXactStatus *trans;
- for (trans = rel->pgstat_info->trans; trans; trans = trans->upper)
+ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ /* If this rel is partitioned, skip modifying */
+ livetuples = deadtuples = 0;
+ else
{
- livetuples -= trans->tuples_inserted - trans->tuples_deleted;
- deadtuples -= trans->tuples_updated + trans->tuples_deleted;
+ for (trans = rel->pgstat_info->trans; trans; trans = trans->upper)
+ {
+ livetuples -= trans->tuples_inserted - trans->tuples_deleted;
+ deadtuples -= trans->tuples_updated + trans->tuples_deleted;
+ }
+ /* count stuff inserted by already-aborted subxacts, too */
+ deadtuples -= rel->pgstat_info->t_counts.t_delta_dead_tuples;
+ /* Since ANALYZE's counts are estimates, we could have underflowed */
+ livetuples = Max(livetuples, 0);
+ deadtuples = Max(deadtuples, 0);
}
- /* count stuff inserted by already-aborted subxacts, too */
- deadtuples -= rel->pgstat_info->t_counts.t_delta_dead_tuples;
- /* Since ANALYZE's counts are estimates, we could have underflowed */
- livetuples = Max(livetuples, 0);
- deadtuples = Max(deadtuples, 0);
+
}
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_ANALYZE);
msg.m_live_tuples = livetuples;
msg.m_dead_tuples = deadtuples;
pgstat_send(&msg, sizeof(msg));
+
+}
+
+/*
+ * pgstat_report_anl_ancestors
+ *
+ * Send list of partitioned table ancestors of the given partition to the
+ * collector. The collector is in charge of propagating the analyze tuple
+ * counts from the partition to its ancestors. This is necessary so that
+ * other processes can decide whether to analyze the partitioned tables.
+ */
+void
+pgstat_report_anl_ancestors(Oid relid)
+{
+ PgStat_MsgAnlAncestors msg;
+ List *ancestors;
+ ListCell *lc;
+
+ pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_ANL_ANCESTORS);
+ msg.m_databaseid = MyDatabaseId;
+ msg.m_tableoid = relid;
+ msg.m_nancestors = 0;
+
+ ancestors = get_partition_ancestors(relid);
+ foreach(lc, ancestors)
+ {
+ Oid ancestor = lfirst_oid(lc);
+
+ msg.m_ancestors[msg.m_nancestors] = ancestor;
+ if (++msg.m_nancestors >= PGSTAT_NUM_ANCESTORENTRIES)
+ {
+ pgstat_send(&msg, offsetof(PgStat_MsgAnlAncestors, m_ancestors[0]) +
+ msg.m_nancestors * sizeof(Oid));
+ msg.m_nancestors = 0;
+ }
+ }
+
+ if (msg.m_nancestors > 0)
+ pgstat_send(&msg, offsetof(PgStat_MsgAnlAncestors, m_ancestors[0]) +
+ msg.m_nancestors * sizeof(Oid));
+
+ list_free(ancestors);
}
/* --------
char relkind = rel->rd_rel->relkind;
/* We only count stats for things that have storage */
- if (!RELKIND_HAS_STORAGE(relkind))
+ if (!RELKIND_HAS_STORAGE(relkind) &&
+ relkind != RELKIND_PARTITIONED_TABLE)
{
rel->pgstat_info = NULL;
return;
pgstat_recv_analyze(&msg.msg_analyze, len);
break;
+ case PGSTAT_MTYPE_ANL_ANCESTORS:
+ pgstat_recv_anl_ancestors(&msg.msg_anl_ancestors, len);
+ break;
+
case PGSTAT_MTYPE_ARCHIVER:
pgstat_recv_archiver(&msg.msg_archiver, len);
break;
result->n_live_tuples = 0;
result->n_dead_tuples = 0;
result->changes_since_analyze = 0;
+ result->changes_since_analyze_reported = 0;
result->inserts_since_vacuum = 0;
result->blocks_fetched = 0;
result->blocks_hit = 0;
tabentry->n_live_tuples = tabmsg->t_counts.t_delta_live_tuples;
tabentry->n_dead_tuples = tabmsg->t_counts.t_delta_dead_tuples;
tabentry->changes_since_analyze = tabmsg->t_counts.t_changed_tuples;
+ tabentry->changes_since_analyze_reported = 0;
tabentry->inserts_since_vacuum = tabmsg->t_counts.t_tuples_inserted;
tabentry->blocks_fetched = tabmsg->t_counts.t_blocks_fetched;
tabentry->blocks_hit = tabmsg->t_counts.t_blocks_hit;
* have no good way to estimate how many of those there were.
*/
if (msg->m_resetcounter)
+ {
tabentry->changes_since_analyze = 0;
+ tabentry->changes_since_analyze_reported = 0;
+ }
if (msg->m_autovacuum)
{
}
}
+static void
+pgstat_recv_anl_ancestors(PgStat_MsgAnlAncestors *msg, int len)
+{
+ PgStat_StatDBEntry *dbentry;
+ PgStat_StatTabEntry *tabentry;
+
+ dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
+
+ tabentry = pgstat_get_tab_entry(dbentry, msg->m_tableoid, true);
+
+ for (int i = 0; i < msg->m_nancestors; i++)
+ {
+ Oid ancestor_relid = msg->m_ancestors[i];
+ PgStat_StatTabEntry *ancestor;
+
+ ancestor = pgstat_get_tab_entry(dbentry, ancestor_relid, true);
+ ancestor->changes_since_analyze +=
+ tabentry->changes_since_analyze - tabentry->changes_since_analyze_reported;
+ }
+
+ tabentry->changes_since_analyze_reported = tabentry->changes_since_analyze;
+
+}
/* ----------
* pgstat_recv_archiver() -
PGSTAT_MTYPE_AUTOVAC_START,
PGSTAT_MTYPE_VACUUM,
PGSTAT_MTYPE_ANALYZE,
+ PGSTAT_MTYPE_ANL_ANCESTORS,
PGSTAT_MTYPE_ARCHIVER,
PGSTAT_MTYPE_BGWRITER,
PGSTAT_MTYPE_WAL,
*
* tuples_inserted/updated/deleted/hot_updated count attempted actions,
* regardless of whether the transaction committed. delta_live_tuples,
- * delta_dead_tuples, and changed_tuples are set depending on commit or abort.
+ * delta_dead_tuples, changed_tuples are set depending on commit or abort.
* Note that delta_live_tuples and delta_dead_tuples can be negative!
* ----------
*/
PgStat_Counter m_dead_tuples;
} PgStat_MsgAnalyze;
+/* ----------
+ * PgStat_MsgAnlAncestors Sent by the backend or autovacuum daemon
+ * to inform partitioned tables that are
+ * ancestors of a partition, to propagate
+ * analyze counters
+ * ----------
+ */
+#define PGSTAT_NUM_ANCESTORENTRIES \
+ ((PGSTAT_MSG_PAYLOAD - sizeof(Oid) - sizeof(Oid) - sizeof(int)) \
+ / sizeof(Oid))
+
+typedef struct PgStat_MsgAnlAncestors
+{
+ PgStat_MsgHdr m_hdr;
+ Oid m_databaseid;
+ Oid m_tableoid;
+ int m_nancestors;
+ Oid m_ancestors[PGSTAT_NUM_ANCESTORENTRIES];
+} PgStat_MsgAnlAncestors;
/* ----------
* PgStat_MsgArchiver Sent by the archiver to update statistics.
PgStat_MsgAutovacStart msg_autovacuum_start;
PgStat_MsgVacuum msg_vacuum;
PgStat_MsgAnalyze msg_analyze;
+ PgStat_MsgAnlAncestors msg_anl_ancestors;
PgStat_MsgArchiver msg_archiver;
PgStat_MsgBgWriter msg_bgwriter;
PgStat_MsgWal msg_wal;
PgStat_Counter n_live_tuples;
PgStat_Counter n_dead_tuples;
PgStat_Counter changes_since_analyze;
+ PgStat_Counter changes_since_analyze_reported;
PgStat_Counter inserts_since_vacuum;
PgStat_Counter blocks_fetched;
extern void pgstat_report_analyze(Relation rel,
PgStat_Counter livetuples, PgStat_Counter deadtuples,
bool resetcounter);
+extern void pgstat_report_anl_ancestors(Oid relid);
extern void pgstat_report_recovery_conflict(int reason);
extern void pgstat_report_deadlock(void);
FROM ((pg_class c
LEFT JOIN pg_index i ON ((c.oid = i.indrelid)))
LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace)))
- WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char", 'm'::"char"]))
+ WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char", 'm'::"char", 'p'::"char"]))
GROUP BY c.oid, n.nspname, c.relname;
pg_stat_archiver| SELECT s.archived_count,
s.last_archived_wal,
FROM ((pg_class c
LEFT JOIN pg_index i ON ((c.oid = i.indrelid)))
LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace)))
- WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char", 'm'::"char"]))
+ WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char", 'm'::"char", 'p'::"char"]))
GROUP BY c.oid, n.nspname, c.relname;
pg_stat_xact_sys_tables| SELECT pg_stat_xact_all_tables.relid,
pg_stat_xact_all_tables.schemaname,