diff options
| author | Alvaro Herrera | 2017-03-24 17:06:10 +0000 |
|---|---|---|
| committer | Alvaro Herrera | 2017-03-24 17:06:10 +0000 |
| commit | 7b504eb282ca2f5104b5c00b4f05a3ef6bb1385b (patch) | |
| tree | 4b12f53c5bd25a03f1016f1daa0809606b47df3a /src/bin | |
| parent | f120b614e070aed39586d1443193738a149a90d4 (diff) | |
Implement multivariate n-distinct coefficients
Add support for explicitly declared statistic objects (CREATE
STATISTICS), allowing collection of statistics on more complex
combinations that individual table columns. Companion commands DROP
STATISTICS and ALTER STATISTICS ... OWNER TO / SET SCHEMA / RENAME are
added too. All this DDL has been designed so that more statistic types
can be added later on, such as multivariate most-common-values and
multivariate histograms between columns of a single table, leaving room
for permitting columns on multiple tables, too, as well as expressions.
This commit only adds support for collection of n-distinct coefficient
on user-specified sets of columns in a single table. This is useful to
estimate number of distinct groups in GROUP BY and DISTINCT clauses;
estimation errors there can cause over-allocation of memory in hashed
aggregates, for instance, so it's a worthwhile problem to solve. A new
special pseudo-type pg_ndistinct is used.
(num-distinct estimation was deemed sufficiently useful by itself that
this is worthwhile even if no further statistic types are added
immediately; so much so that another version of essentially the same
functionality was submitted by Kyotaro Horiguchi:
https://postgr.es/m/20150828.173334.114731693.horiguchi.kyotaro@lab.ntt.co.jp
though this commit does not use that code.)
Author: Tomas Vondra. Some code rework by Álvaro.
Reviewed-by: Dean Rasheed, David Rowley, Kyotaro Horiguchi, Jeff Janes,
Ideriha Takeshi
Discussion: https://postgr.es/m/543AFA15.4080608@fuzzy.cz
https://postgr.es/m/20170320190220.ixlaueanxegqd5gr@alvherre.pgsql
Diffstat (limited to 'src/bin')
| -rw-r--r-- | src/bin/pg_dump/common.c | 4 | ||||
| -rw-r--r-- | src/bin/pg_dump/pg_backup_archiver.c | 3 | ||||
| -rw-r--r-- | src/bin/pg_dump/pg_dump.c | 153 | ||||
| -rw-r--r-- | src/bin/pg_dump/pg_dump.h | 9 | ||||
| -rw-r--r-- | src/bin/pg_dump/pg_dump_sort.c | 12 | ||||
| -rw-r--r-- | src/bin/psql/describe.c | 51 |
6 files changed, 228 insertions, 4 deletions
diff --git a/src/bin/pg_dump/common.c b/src/bin/pg_dump/common.c index 89530a9f0fb..e2bc3576dc3 100644 --- a/src/bin/pg_dump/common.c +++ b/src/bin/pg_dump/common.c @@ -273,6 +273,10 @@ getSchemaData(Archive *fout, int *numTablesPtr) getIndexes(fout, tblinfo, numTables); if (g_verbose) + write_msg(NULL, "reading extended statistics\n"); + getExtendedStatistics(fout, tblinfo, numTables); + + if (g_verbose) write_msg(NULL, "reading constraints\n"); getConstraints(fout, tblinfo, numTables); diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index dd0892539a3..f77581d6ec9 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -3540,7 +3540,8 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, bool isData, bool acl_pass) strcmp(te->desc, "TRIGGER") == 0 || strcmp(te->desc, "ROW SECURITY") == 0 || strcmp(te->desc, "POLICY") == 0 || - strcmp(te->desc, "USER MAPPING") == 0) + strcmp(te->desc, "USER MAPPING") == 0 || + strcmp(te->desc, "STATISTICS") == 0) { /* these object types don't have separate owners */ } diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index b3d95d7f6ee..ba34cc163e9 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -192,6 +192,7 @@ static void dumpAttrDef(Archive *fout, AttrDefInfo *adinfo); static void dumpSequence(Archive *fout, TableInfo *tbinfo); static void dumpSequenceData(Archive *fout, TableDataInfo *tdinfo); static void dumpIndex(Archive *fout, IndxInfo *indxinfo); +static void dumpStatisticsExt(Archive *fout, StatsExtInfo *statsextinfo); static void dumpConstraint(Archive *fout, ConstraintInfo *coninfo); static void dumpTableConstraintComment(Archive *fout, ConstraintInfo *coninfo); static void dumpTSParser(Archive *fout, TSParserInfo *prsinfo); @@ -6583,6 +6584,99 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) } /* + * getExtendedStatistics + * get information about extended statistics on a dumpable table + * or materialized view. + * + * Note: extended statistics data is not returned directly to the caller, but + * it does get entered into the DumpableObject tables. + */ +void +getExtendedStatistics(Archive *fout, TableInfo tblinfo[], int numTables) +{ + int i, + j; + PQExpBuffer query; + PGresult *res; + StatsExtInfo *statsextinfo; + int ntups; + int i_tableoid; + int i_oid; + int i_staname; + int i_stadef; + + /* Extended statistics were new in v10 */ + if (fout->remoteVersion < 100000) + return; + + query = createPQExpBuffer(); + + for (i = 0; i < numTables; i++) + { + TableInfo *tbinfo = &tblinfo[i]; + + /* Only plain tables and materialized views can have extended statistics. */ + if (tbinfo->relkind != RELKIND_RELATION && + tbinfo->relkind != RELKIND_MATVIEW) + continue; + + /* + * Ignore extended statistics of tables whose definitions are not to + * be dumped. + */ + if (!(tbinfo->dobj.dump & DUMP_COMPONENT_DEFINITION)) + continue; + + if (g_verbose) + write_msg(NULL, "reading extended statistics for table \"%s.%s\"\n", + tbinfo->dobj.namespace->dobj.name, + tbinfo->dobj.name); + + /* Make sure we are in proper schema so stadef is right */ + selectSourceSchema(fout, tbinfo->dobj.namespace->dobj.name); + + resetPQExpBuffer(query); + + appendPQExpBuffer(query, + "SELECT " + "tableoid, " + "oid, " + "staname, " + "pg_catalog.pg_get_statisticsextdef(oid) AS stadef " + "FROM pg_statistic_ext " + "WHERE starelid = '%u' " + "ORDER BY staname", tbinfo->dobj.catId.oid); + + res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); + + ntups = PQntuples(res); + + i_tableoid = PQfnumber(res, "tableoid"); + i_oid = PQfnumber(res, "oid"); + i_staname = PQfnumber(res, "staname"); + i_stadef = PQfnumber(res, "stadef"); + + statsextinfo = (StatsExtInfo *) pg_malloc(ntups * sizeof(StatsExtInfo)); + + for (j = 0; j < ntups; j++) + { + statsextinfo[j].dobj.objType = DO_STATSEXT; + statsextinfo[j].dobj.catId.tableoid = atooid(PQgetvalue(res, j, i_tableoid)); + statsextinfo[j].dobj.catId.oid = atooid(PQgetvalue(res, j, i_oid)); + AssignDumpId(&statsextinfo[j].dobj); + statsextinfo[j].dobj.name = pg_strdup(PQgetvalue(res, j, i_staname)); + statsextinfo[j].dobj.namespace = tbinfo->dobj.namespace; + statsextinfo[j].statsexttable = tbinfo; + statsextinfo[j].statsextdef = pg_strdup(PQgetvalue(res, j, i_stadef)); + } + + PQclear(res); + } + + destroyPQExpBuffer(query); +} + +/* * getConstraints * * Get info about constraints on dumpable tables. @@ -9234,6 +9328,9 @@ dumpDumpableObject(Archive *fout, DumpableObject *dobj) case DO_INDEX: dumpIndex(fout, (IndxInfo *) dobj); break; + case DO_STATSEXT: + dumpStatisticsExt(fout, (StatsExtInfo *) dobj); + break; case DO_REFRESH_MATVIEW: refreshMatViewData(fout, (TableDataInfo *) dobj); break; @@ -15729,6 +15826,61 @@ dumpIndex(Archive *fout, IndxInfo *indxinfo) } /* + * dumpStatisticsExt + * write out to fout an extended statistics object + */ +static void +dumpStatisticsExt(Archive *fout, StatsExtInfo *statsextinfo) +{ + DumpOptions *dopt = fout->dopt; + TableInfo *tbinfo = statsextinfo->statsexttable; + PQExpBuffer q; + PQExpBuffer delq; + PQExpBuffer labelq; + + if (dopt->dataOnly) + return; + + q = createPQExpBuffer(); + delq = createPQExpBuffer(); + labelq = createPQExpBuffer(); + + appendPQExpBuffer(labelq, "STATISTICS %s", + fmtId(statsextinfo->dobj.name)); + + appendPQExpBuffer(q, "%s;\n", statsextinfo->statsextdef); + + appendPQExpBuffer(delq, "DROP STATISTICS %s.", + fmtId(tbinfo->dobj.namespace->dobj.name)); + appendPQExpBuffer(delq, "%s;\n", + fmtId(statsextinfo->dobj.name)); + + if (statsextinfo->dobj.dump & DUMP_COMPONENT_DEFINITION) + ArchiveEntry(fout, statsextinfo->dobj.catId, + statsextinfo->dobj.dumpId, + statsextinfo->dobj.name, + tbinfo->dobj.namespace->dobj.name, + NULL, + tbinfo->rolname, false, + "STATISTICS", SECTION_POST_DATA, + q->data, delq->data, NULL, + NULL, 0, + NULL, NULL); + + /* Dump Statistics Comments */ + if (statsextinfo->dobj.dump & DUMP_COMPONENT_COMMENT) + dumpComment(fout, labelq->data, + tbinfo->dobj.namespace->dobj.name, + tbinfo->rolname, + statsextinfo->dobj.catId, 0, + statsextinfo->dobj.dumpId); + + destroyPQExpBuffer(q); + destroyPQExpBuffer(delq); + destroyPQExpBuffer(labelq); +} + +/* * dumpConstraint * write out to fout a user-defined constraint */ @@ -17266,6 +17418,7 @@ addBoundaryDependencies(DumpableObject **dobjs, int numObjs, addObjectDependency(postDataBound, dobj->dumpId); break; case DO_INDEX: + case DO_STATSEXT: case DO_REFRESH_MATVIEW: case DO_TRIGGER: case DO_EVENT_TRIGGER: diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h index a466527ec68..cb22f63bd6a 100644 --- a/src/bin/pg_dump/pg_dump.h +++ b/src/bin/pg_dump/pg_dump.h @@ -56,6 +56,7 @@ typedef enum DO_TABLE, DO_ATTRDEF, DO_INDEX, + DO_STATSEXT, DO_RULE, DO_TRIGGER, DO_CONSTRAINT, @@ -362,6 +363,13 @@ typedef struct _indxInfo int relpages; /* relpages of the underlying table */ } IndxInfo; +typedef struct _statsExtInfo +{ + DumpableObject dobj; + TableInfo *statsexttable; /* link to table the stats ext is for */ + char *statsextdef; +} StatsExtInfo; + typedef struct _ruleInfo { DumpableObject dobj; @@ -682,6 +690,7 @@ extern void getOwnedSeqs(Archive *fout, TableInfo tblinfo[], int numTables); extern InhInfo *getInherits(Archive *fout, int *numInherits); extern PartInfo *getPartitions(Archive *fout, int *numPartitions); extern void getIndexes(Archive *fout, TableInfo tblinfo[], int numTables); +extern void getExtendedStatistics(Archive *fout, TableInfo tblinfo[], int numTables); extern void getConstraints(Archive *fout, TableInfo tblinfo[], int numTables); extern RuleInfo *getRules(Archive *fout, int *numRules); extern void getTriggers(Archive *fout, TableInfo tblinfo[], int numTables); diff --git a/src/bin/pg_dump/pg_dump_sort.c b/src/bin/pg_dump/pg_dump_sort.c index e555de88572..5c19b05ca48 100644 --- a/src/bin/pg_dump/pg_dump_sort.c +++ b/src/bin/pg_dump/pg_dump_sort.c @@ -53,10 +53,11 @@ static const int dbObjectTypePriority[] = 18, /* DO_TABLE */ 20, /* DO_ATTRDEF */ 28, /* DO_INDEX */ - 29, /* DO_RULE */ - 30, /* DO_TRIGGER */ + 29, /* DO_STATSEXT */ + 30, /* DO_RULE */ + 31, /* DO_TRIGGER */ 27, /* DO_CONSTRAINT */ - 31, /* DO_FK_CONSTRAINT */ + 32, /* DO_FK_CONSTRAINT */ 2, /* DO_PROCLANG */ 10, /* DO_CAST */ 23, /* DO_TABLE_DATA */ @@ -1291,6 +1292,11 @@ describeDumpableObject(DumpableObject *obj, char *buf, int bufsize) "INDEX %s (ID %d OID %u)", obj->name, obj->dumpId, obj->catId.oid); return; + case DO_STATSEXT: + snprintf(buf, bufsize, + "STATISTICS %s (ID %d OID %u)", + obj->name, obj->dumpId, obj->catId.oid); + return; case DO_REFRESH_MATVIEW: snprintf(buf, bufsize, "REFRESH MATERIALIZED VIEW %s (ID %d OID %u)", diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index 8c583127fdd..3cf1742020f 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -2320,6 +2320,57 @@ describeOneTableDetails(const char *schemaname, PQclear(result); } + /* print any extended statistics */ + if (pset.sversion >= 100000) + { + printfPQExpBuffer(&buf, + "SELECT oid, stanamespace::regnamespace AS nsp, staname, stakeys,\n" + " (SELECT pg_catalog.string_agg(pg_catalog.quote_ident(attname::text),', ') \n" + " FROM ((SELECT pg_catalog.unnest(stakeys) AS attnum) s\n" + " JOIN pg_catalog.pg_attribute a ON (starelid = a.attrelid AND\n" + "a.attnum = s.attnum AND not attisdropped))) AS columns,\n" + " (staenabled::char[] @> '{d}'::char[]) AS ndist_enabled\n" + "FROM pg_catalog.pg_statistic_ext stat WHERE starelid = '%s'\n" + "ORDER BY 1;", + oid); + + result = PSQLexec(buf.data); + if (!result) + goto error_return; + else + tuples = PQntuples(result); + + if (tuples > 0) + { + printTableAddFooter(&cont, _("Statistics:")); + + for (i = 0; i < tuples; i++) + { + int cnt = 0; + + printfPQExpBuffer(&buf, " "); + + /* statistics name (qualified with namespace) */ + appendPQExpBuffer(&buf, "\"%s.%s\" WITH (", + PQgetvalue(result, i, 1), + PQgetvalue(result, i, 2)); + + /* options */ + if (strcmp(PQgetvalue(result, i, 5), "t") == 0) + { + appendPQExpBufferStr(&buf, "ndistinct"); + cnt++; + } + + appendPQExpBuffer(&buf, ") ON (%s)", + PQgetvalue(result, i, 4)); + + printTableAddFooter(&cont, buf.data); + } + } + PQclear(result); + } + /* print rules */ if (tableinfo.hasrules && tableinfo.relkind != RELKIND_MATVIEW) { |
