summaryrefslogtreecommitdiff
path: root/src/include
diff options
context:
space:
mode:
authorTom Lane2019-02-09 23:32:23 +0000
committerTom Lane2019-02-09 23:32:23 +0000
commita391ff3c3d418e404a2c6e4ff0865a107752827b (patch)
treef076c5785d06e7ccf082b08c1651b17d95af4563 /src/include
parent1fb57af92069ee104c09e2016af9e0e620681be3 (diff)
Build out the planner support function infrastructure.
Add support function requests for estimating the selectivity, cost, and number of result rows (if a SRF) of the target function. The lack of a way to estimate selectivity of a boolean-returning function in WHERE has been a recognized deficiency of the planner since Berkeley days. This commit finally fixes it. In addition, non-constant estimates of cost and number of output rows are now possible. We still fall back to looking at procost and prorows if the support function doesn't service the request, of course. To make concrete use of the possibility of estimating output rowcount for SRFs, this commit adds support functions for array_unnest(anyarray) and the integer variants of generate_series; the lack of plausible rowcount estimates for those, even when it's obvious to a human, has been a repeated subject of complaints. Obviously, much more could now be done in this line, but I'm mostly just trying to get the infrastructure in place. Discussion: https://postgr.es/m/15193.1548028093@sss.pgh.pa.us
Diffstat (limited to 'src/include')
-rw-r--r--src/include/catalog/catversion.h2
-rw-r--r--src/include/catalog/pg_proc.dat25
-rw-r--r--src/include/nodes/nodes.h5
-rw-r--r--src/include/nodes/pathnodes.h2
-rw-r--r--src/include/nodes/supportnodes.h100
-rw-r--r--src/include/optimizer/clauses.h2
-rw-r--r--src/include/optimizer/plancat.h14
-rw-r--r--src/include/utils/lsyscache.h3
8 files changed, 141 insertions, 12 deletions
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 9233fb934e8..968a6800ddc 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 201902091
+#define CATALOG_VERSION_NO 201902092
#endif
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 1f5352ce543..50b742c06e6 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1530,9 +1530,12 @@
proargtypes => 'anyelement _int4 _int4',
prosrc => 'array_fill_with_lower_bounds' },
{ oid => '2331', descr => 'expand array to set of rows',
- proname => 'unnest', prorows => '100', proretset => 't',
- prorettype => 'anyelement', proargtypes => 'anyarray',
+ proname => 'unnest', prorows => '100', prosupport => 'array_unnest_support',
+ proretset => 't', prorettype => 'anyelement', proargtypes => 'anyarray',
prosrc => 'array_unnest' },
+{ oid => '3996', descr => 'planner support for array_unnest',
+ proname => 'array_unnest_support', prorettype => 'internal',
+ proargtypes => 'internal', prosrc => 'array_unnest_support' },
{ oid => '3167',
descr => 'remove any occurrences of an element from an array',
proname => 'array_remove', proisstrict => 'f', prorettype => 'anyarray',
@@ -7536,21 +7539,31 @@
# non-persistent series generator
{ oid => '1066', descr => 'non-persistent series generator',
- proname => 'generate_series', prorows => '1000', proretset => 't',
+ proname => 'generate_series', prorows => '1000',
+ prosupport => 'generate_series_int4_support', proretset => 't',
prorettype => 'int4', proargtypes => 'int4 int4 int4',
prosrc => 'generate_series_step_int4' },
{ oid => '1067', descr => 'non-persistent series generator',
- proname => 'generate_series', prorows => '1000', proretset => 't',
+ proname => 'generate_series', prorows => '1000',
+ prosupport => 'generate_series_int4_support', proretset => 't',
prorettype => 'int4', proargtypes => 'int4 int4',
prosrc => 'generate_series_int4' },
+{ oid => '3994', descr => 'planner support for generate_series',
+ proname => 'generate_series_int4_support', prorettype => 'internal',
+ proargtypes => 'internal', prosrc => 'generate_series_int4_support' },
{ oid => '1068', descr => 'non-persistent series generator',
- proname => 'generate_series', prorows => '1000', proretset => 't',
+ proname => 'generate_series', prorows => '1000',
+ prosupport => 'generate_series_int8_support', proretset => 't',
prorettype => 'int8', proargtypes => 'int8 int8 int8',
prosrc => 'generate_series_step_int8' },
{ oid => '1069', descr => 'non-persistent series generator',
- proname => 'generate_series', prorows => '1000', proretset => 't',
+ proname => 'generate_series', prorows => '1000',
+ prosupport => 'generate_series_int8_support', proretset => 't',
prorettype => 'int8', proargtypes => 'int8 int8',
prosrc => 'generate_series_int8' },
+{ oid => '3995', descr => 'planner support for generate_series',
+ proname => 'generate_series_int8_support', prorettype => 'internal',
+ proargtypes => 'internal', prosrc => 'generate_series_int8_support' },
{ oid => '3259', descr => 'non-persistent series generator',
proname => 'generate_series', prorows => '1000', proretset => 't',
prorettype => 'numeric', proargtypes => 'numeric numeric numeric',
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 0d2d1889e9d..453079a9e26 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -507,7 +507,10 @@ typedef enum NodeTag
T_TsmRoutine, /* in access/tsmapi.h */
T_ForeignKeyCacheInfo, /* in utils/rel.h */
T_CallContext, /* in nodes/parsenodes.h */
- T_SupportRequestSimplify /* in nodes/supportnodes.h */
+ T_SupportRequestSimplify, /* in nodes/supportnodes.h */
+ T_SupportRequestSelectivity, /* in nodes/supportnodes.h */
+ T_SupportRequestCost, /* in nodes/supportnodes.h */
+ T_SupportRequestRows /* in nodes/supportnodes.h */
} NodeTag;
/*
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 0b780b07c12..c23c4304f37 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -61,7 +61,7 @@ typedef struct AggClauseCosts
bool hasNonPartial; /* does any agg not support partial mode? */
bool hasNonSerial; /* is any partial agg non-serializable? */
QualCost transCost; /* total per-input-row execution costs */
- Cost finalCost; /* total per-aggregated-row costs */
+ QualCost finalCost; /* total per-aggregated-row costs */
Size transitionSpace; /* space for pass-by-ref transition data */
} AggClauseCosts;
diff --git a/src/include/nodes/supportnodes.h b/src/include/nodes/supportnodes.h
index 1f7d02b5ee2..1a3a36ba99c 100644
--- a/src/include/nodes/supportnodes.h
+++ b/src/include/nodes/supportnodes.h
@@ -36,6 +36,7 @@
#include "nodes/primnodes.h"
struct PlannerInfo; /* avoid including relation.h here */
+struct SpecialJoinInfo;
/*
@@ -67,4 +68,103 @@ typedef struct SupportRequestSimplify
FuncExpr *fcall; /* Function call to be simplified */
} SupportRequestSimplify;
+/*
+ * The Selectivity request allows the support function to provide a
+ * selectivity estimate for a function appearing at top level of a WHERE
+ * clause (so it applies only to functions returning boolean).
+ *
+ * The input arguments are the same as are supplied to operator restriction
+ * and join estimators, except that we unify those two APIs into just one
+ * request type. See clause_selectivity() for the details.
+ *
+ * If an estimate can be made, store it into the "selectivity" field and
+ * return the address of the SupportRequestSelectivity node; the estimate
+ * must be between 0 and 1 inclusive. Return NULL if no estimate can be
+ * made (in which case the planner will fall back to a default estimate,
+ * traditionally 1/3).
+ *
+ * If the target function is being used as the implementation of an operator,
+ * the support function will not be used for this purpose; the operator's
+ * restriction or join estimator is consulted instead.
+ */
+typedef struct SupportRequestSelectivity
+{
+ NodeTag type;
+
+ /* Input fields: */
+ struct PlannerInfo *root; /* Planner's infrastructure */
+ Oid funcid; /* function we are inquiring about */
+ List *args; /* pre-simplified arguments to function */
+ Oid inputcollid; /* function's input collation */
+ bool is_join; /* is this a join or restriction case? */
+ int varRelid; /* if restriction, RTI of target relation */
+ JoinType jointype; /* if join, outer join type */
+ struct SpecialJoinInfo *sjinfo; /* if outer join, info about join */
+
+ /* Output fields: */
+ Selectivity selectivity; /* returned selectivity estimate */
+} SupportRequestSelectivity;
+
+/*
+ * The Cost request allows the support function to provide an execution
+ * cost estimate for its target function. The cost estimate can include
+ * both a one-time (query startup) component and a per-execution component.
+ * The estimate should *not* include the costs of evaluating the target
+ * function's arguments, only the target function itself.
+ *
+ * The "node" argument is normally the parse node that is invoking the
+ * target function. This is a FuncExpr in the simplest case, but it could
+ * also be an OpExpr, DistinctExpr, NullIfExpr, or WindowFunc, or possibly
+ * other cases in future. NULL is passed if the function cannot presume
+ * its arguments to be equivalent to what the calling node presents as
+ * arguments; that happens for, e.g., aggregate support functions and
+ * per-column comparison operators used by RowExprs.
+ *
+ * If an estimate can be made, store it into the cost fields and return the
+ * address of the SupportRequestCost node. Return NULL if no estimate can be
+ * made, in which case the planner will rely on the target function's procost
+ * field. (Note: while procost is automatically scaled by cpu_operator_cost,
+ * this is not the case for the outputs of the Cost request; the support
+ * function must scale its results appropriately on its own.)
+ */
+typedef struct SupportRequestCost
+{
+ NodeTag type;
+
+ /* Input fields: */
+ struct PlannerInfo *root; /* Planner's infrastructure (could be NULL) */
+ Oid funcid; /* function we are inquiring about */
+ Node *node; /* parse node invoking function, or NULL */
+
+ /* Output fields: */
+ Cost startup; /* one-time cost */
+ Cost per_tuple; /* per-evaluation cost */
+} SupportRequestCost;
+
+/*
+ * The Rows request allows the support function to provide an output rowcount
+ * estimate for its target function (so it applies only to set-returning
+ * functions).
+ *
+ * The "node" argument is the parse node that is invoking the target function;
+ * currently this will always be a FuncExpr or OpExpr.
+ *
+ * If an estimate can be made, store it into the rows field and return the
+ * address of the SupportRequestRows node. Return NULL if no estimate can be
+ * made, in which case the planner will rely on the target function's prorows
+ * field.
+ */
+typedef struct SupportRequestRows
+{
+ NodeTag type;
+
+ /* Input fields: */
+ struct PlannerInfo *root; /* Planner's infrastructure (could be NULL) */
+ Oid funcid; /* function we are inquiring about */
+ Node *node; /* parse node invoking function */
+
+ /* Output fields: */
+ double rows; /* number of rows expected to be returned */
+} SupportRequestRows;
+
#endif /* SUPPORTNODES_H */
diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h
index 95a78cfa393..5e10fb1d507 100644
--- a/src/include/optimizer/clauses.h
+++ b/src/include/optimizer/clauses.h
@@ -31,7 +31,7 @@ extern void get_agg_clause_costs(PlannerInfo *root, Node *clause,
extern bool contain_window_function(Node *clause);
extern WindowFuncLists *find_window_functions(Node *clause, Index maxWinRef);
-extern double expression_returns_set_rows(Node *clause);
+extern double expression_returns_set_rows(PlannerInfo *root, Node *clause);
extern bool contain_subplans(Node *clause);
diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h
index 40f70f9f2b1..c337f047cb7 100644
--- a/src/include/optimizer/plancat.h
+++ b/src/include/optimizer/plancat.h
@@ -55,6 +55,20 @@ extern Selectivity join_selectivity(PlannerInfo *root,
JoinType jointype,
SpecialJoinInfo *sjinfo);
+extern Selectivity function_selectivity(PlannerInfo *root,
+ Oid funcid,
+ List *args,
+ Oid inputcollid,
+ bool is_join,
+ int varRelid,
+ JoinType jointype,
+ SpecialJoinInfo *sjinfo);
+
+extern void add_function_cost(PlannerInfo *root, Oid funcid, Node *node,
+ QualCost *cost);
+
+extern double get_function_rows(PlannerInfo *root, Oid funcid, Node *node);
+
extern bool has_row_triggers(PlannerInfo *root, Index rti, CmdType event);
#endif /* PLANCAT_H */
diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h
index ceec85db925..16b0b1d2dcc 100644
--- a/src/include/utils/lsyscache.h
+++ b/src/include/utils/lsyscache.h
@@ -120,8 +120,7 @@ extern char func_volatile(Oid funcid);
extern char func_parallel(Oid funcid);
extern char get_func_prokind(Oid funcid);
extern bool get_func_leakproof(Oid funcid);
-extern float4 get_func_cost(Oid funcid);
-extern float4 get_func_rows(Oid funcid);
+extern RegProcedure get_func_support(Oid funcid);
extern Oid get_relname_relid(const char *relname, Oid relnamespace);
extern char *get_rel_name(Oid relid);
extern Oid get_rel_namespace(Oid relid);