summaryrefslogtreecommitdiff
path: root/src/backend/optimizer
diff options
context:
space:
mode:
authorAlexander Korotkov2025-04-04 13:01:50 +0000
committerAlexander Korotkov2025-04-04 13:01:50 +0000
commitc0962a113d1f2f94cb7222a7ca025a67e9ce3860 (patch)
tree6da1bad29d2394bca13b4c29c4df5ae9a4ceee67 /src/backend/optimizer
parentd48d2e2dc8be50d3ca13305b5699384329b15433 (diff)
Convert 'x IN (VALUES ...)' to 'x = ANY ...' then appropriate
This commit implements the automatic conversion of 'x IN (VALUES ...)' into ScalarArrayOpExpr. That simplifies the query tree, eliminating the appearance of an unnecessary join. Since VALUES describes a relational table, and the value of such a list is a table row, the optimizer will likely face an underestimation problem due to the inability to estimate cardinality through MCV statistics. The cardinality evaluation mechanism can work with the array inclusion check operation. If the array is small enough (< 100 elements), it will perform a statistical evaluation element by element. We perform the transformation in the convert_ANY_sublink_to_join() if VALUES RTE is proper and the transformation is convertible. The conversion is only possible for operations on scalar values, not rows. Also, we currently support the transformation only when it ends up with a constant array. Otherwise, the evaluation of non-hashed SAOP might be slower than the corresponding Hash Join with VALUES. Discussion: https://postgr.es/m/0184212d-1248-4f1f-a42d-f5cb1c1976d2%40tantorlabs.com Author: Alena Rybakina <a.rybakina@postgrespro.ru> Author: Andrei Lepikhov <lepihov@gmail.com> Reviewed-by: Ivan Kush <ivan.kush@tantorlabs.com> Reviewed-by: Alexander Korotkov <aekorotkov@gmail.com>
Diffstat (limited to 'src/backend/optimizer')
-rw-r--r--src/backend/optimizer/plan/subselect.c80
-rw-r--r--src/backend/optimizer/prep/prepjointree.c12
-rw-r--r--src/backend/optimizer/util/clauses.c14
3 files changed, 101 insertions, 5 deletions
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index 8230cbea3c3..e7cb3fede66 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -1214,6 +1214,86 @@ inline_cte_walker(Node *node, inline_cte_walker_context *context)
return expression_tree_walker(node, inline_cte_walker, context);
}
+/*
+ * Attempt to transform 'testexpr' over the VALUES subquery into
+ * a ScalarArrayOpExpr. We currently support the transformation only when
+ * it ends up with a constant array. Otherwise, the evaluation of non-hashed
+ * SAOP might be slower than the corresponding Hash Join with VALUES.
+ *
+ * Return transformed ScalarArrayOpExpr or NULL if transformation isn't
+ * allowed.
+ */
+ScalarArrayOpExpr *
+convert_VALUES_to_ANY(PlannerInfo *root, Node *testexpr, Query *values)
+{
+ RangeTblEntry *rte;
+ Node *leftop;
+ Node *rightop;
+ Oid opno;
+ ListCell *lc;
+ Oid inputcollid;
+ List *exprs = NIL;
+
+ /*
+ * Check we have a binary operator over a single-column subquery with no
+ * joins and no LIMIT/OFFSET/ORDER BY clauses.
+ */
+ if (!IsA(testexpr, OpExpr) ||
+ list_length(((OpExpr *) testexpr)->args) != 2 ||
+ list_length(values->targetList) > 1 ||
+ values->limitCount != NULL ||
+ values->limitOffset != NULL ||
+ values->sortClause != NIL ||
+ list_length(values->rtable) != 1)
+ return NULL;
+
+ rte = linitial_node(RangeTblEntry, values->rtable);
+ leftop = linitial(((OpExpr *) testexpr)->args);
+ rightop = lsecond(((OpExpr *) testexpr)->args);
+ opno = ((OpExpr *) testexpr)->opno;
+ inputcollid = ((OpExpr *) testexpr)->inputcollid;
+
+ /*
+ * Also, check that only RTE corresponds to VALUES; the list of values has
+ * at least two items and no volatile functions.
+ */
+ if (rte->rtekind != RTE_VALUES ||
+ list_length(rte->values_lists) < 2 ||
+ contain_volatile_functions((Node *) rte->values_lists))
+ return NULL;
+
+ foreach(lc, rte->values_lists)
+ {
+ List *elem = lfirst(lc);
+ Node *value = linitial(elem);
+
+ /*
+ * Prepare an evaluation of the right side of the operator with
+ * substitution of the given value.
+ */
+ value = convert_testexpr(root, rightop, list_make1(value));
+
+ /*
+ * Try to evaluate constant expressions. We could get Const as a
+ * result.
+ */
+ value = eval_const_expressions(root, value);
+
+ /*
+ * As we only support constant output arrays, all the items must also
+ * be constant.
+ */
+ if (!IsA(value, Const))
+ return NULL;
+
+ exprs = lappend(exprs, value);
+ }
+
+ /* Finally, build ScalarArrayOpExpr at the top of the 'exprs' list. */
+ return make_SAOP_expr(opno, leftop, exprType(rightop),
+ linitial_oid(rte->colcollations), inputcollid,
+ exprs, false);
+}
/*
* convert_ANY_sublink_to_join: try to convert an ANY SubLink to a join
diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c
index d131a5bbc59..87dc6f56b57 100644
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -664,6 +664,18 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
/* Is it a convertible ANY or EXISTS clause? */
if (sublink->subLinkType == ANY_SUBLINK)
{
+ ScalarArrayOpExpr *saop;
+
+ if ((saop = convert_VALUES_to_ANY(root,
+ sublink->testexpr,
+ (Query *) sublink->subselect)) != NULL)
+
+ /*
+ * The VALUES sequence was simplified. Nothing more to do
+ * here.
+ */
+ return (Node *) saop;
+
if ((j = convert_ANY_sublink_to_join(root, sublink,
available_rels1)) != NULL)
{
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index 9965df1b965..26a3e050086 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -5484,26 +5484,30 @@ make_SAOP_expr(Oid oper, Node *leftexpr, Oid coltype, Oid arraycollid,
bool typbyval;
char typalign;
Datum *elems;
+ bool *nulls;
int i = 0;
ArrayType *arrayConst;
+ int dims[1] = {list_length(exprs)};
+ int lbs[1] = {1};
get_typlenbyvalalign(coltype, &typlen, &typbyval, &typalign);
elems = (Datum *) palloc(sizeof(Datum) * list_length(exprs));
+ nulls = (bool *) palloc(sizeof(bool) * list_length(exprs));
foreach_node(Const, value, exprs)
{
- Assert(!value->constisnull);
-
- elems[i++] = value->constvalue;
+ elems[i] = value->constvalue;
+ nulls[i++] = value->constisnull;
}
- arrayConst = construct_array(elems, i, coltype,
- typlen, typbyval, typalign);
+ arrayConst = construct_md_array(elems, nulls, 1, dims, lbs,
+ coltype, typlen, typbyval, typalign);
arrayNode = (Node *) makeConst(arraytype, -1, arraycollid,
-1, PointerGetDatum(arrayConst),
false, false);
pfree(elems);
+ pfree(nulls);
list_free(exprs);
}