From 075df6b2080b13e0a5adc88737b7c24417a873c1 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 20 Jan 2024 13:57:54 -0500 Subject: [PATCH] Add planner support functions for range operators <@ and @>. These support functions will transform expressions with constant range values into direct comparisons on the range bound values, which are frequently better-optimizable. The transformation is skipped however if it would require double evaluation of a volatile or expensive element expression. Along the way, add the range opfamily OID to range typcache entries, since load_rangetype_info has to compute that anyway and it seems silly to duplicate the work later. Kim Johan Andersson and Jian He, reviewed by Laurenz Albe Discussion: https://postgr.es/m/94f64d1f-b8c0-b0c5-98bc-0793a34e0851@kimmet.dk --- src/backend/utils/adt/rangetypes.c | 244 +++++++++++++++++++- src/backend/utils/adt/rangetypes_selfuncs.c | 7 +- src/backend/utils/cache/typcache.c | 1 + src/include/catalog/catversion.h | 2 +- src/include/catalog/pg_proc.dat | 14 +- src/include/utils/typcache.h | 1 + src/test/regress/expected/rangetypes.out | 142 ++++++++++++ src/test/regress/sql/rangetypes.sql | 69 ++++++ 8 files changed, 469 insertions(+), 11 deletions(-) diff --git a/src/backend/utils/adt/rangetypes.c b/src/backend/utils/adt/rangetypes.c index d99b00b590..2d94a6b877 100644 --- a/src/backend/utils/adt/rangetypes.c +++ b/src/backend/utils/adt/rangetypes.c @@ -30,19 +30,20 @@ */ #include "postgres.h" -#include "access/tupmacs.h" #include "common/hashfn.h" -#include "lib/stringinfo.h" #include "libpq/pqformat.h" #include "miscadmin.h" +#include "nodes/makefuncs.h" #include "nodes/miscnodes.h" -#include "port/pg_bitutils.h" +#include "nodes/supportnodes.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" +#include "optimizer/optimizer.h" #include "utils/builtins.h" #include "utils/date.h" #include "utils/lsyscache.h" #include "utils/rangetypes.h" #include "utils/timestamp.h" -#include "varatt.h" /* fn_extra cache entry for one of the range I/O functions */ @@ -69,6 +70,12 @@ static Size datum_compute_size(Size data_length, Datum val, bool typbyval, char typalign, int16 typlen, char typstorage); static Pointer datum_write(Pointer ptr, Datum datum, bool typbyval, char typalign, int16 typlen, char typstorage); +static Node *find_simplified_clause(PlannerInfo *root, + Expr *rangeExpr, Expr *elemExpr); +static Expr *build_bound_expr(Expr *elemExpr, Datum val, + bool isLowerBound, bool isInclusive, + TypeCacheEntry *typeCache, + Oid opfamily, Oid rng_collation); /* @@ -2173,6 +2180,58 @@ make_empty_range(TypeCacheEntry *typcache) return make_range(typcache, &lower, &upper, true, NULL); } +/* + * Planner support function for elem_contained_by_range (<@ operator). + */ +Datum +elem_contained_by_range_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + Node *ret = NULL; + + if (IsA(rawreq, SupportRequestSimplify)) + { + SupportRequestSimplify *req = (SupportRequestSimplify *) rawreq; + FuncExpr *fexpr = req->fcall; + Expr *leftop, + *rightop; + + Assert(list_length(fexpr->args) == 2); + leftop = linitial(fexpr->args); + rightop = lsecond(fexpr->args); + + ret = find_simplified_clause(req->root, rightop, leftop); + } + + PG_RETURN_POINTER(ret); +} + +/* + * Planner support function for range_contains_elem (@> operator). + */ +Datum +range_contains_elem_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + Node *ret = NULL; + + if (IsA(rawreq, SupportRequestSimplify)) + { + SupportRequestSimplify *req = (SupportRequestSimplify *) rawreq; + FuncExpr *fexpr = req->fcall; + Expr *leftop, + *rightop; + + Assert(list_length(fexpr->args) == 2); + leftop = linitial(fexpr->args); + rightop = lsecond(fexpr->args); + + ret = find_simplified_clause(req->root, leftop, rightop); + } + + PG_RETURN_POINTER(ret); +} + /* *---------------------------------------------------------- @@ -2715,3 +2774,180 @@ datum_write(Pointer ptr, Datum datum, bool typbyval, char typalign, return ptr; } + +/* + * Common code for the elem_contained_by_range and range_contains_elem + * support functions. The caller has extracted the function argument + * expressions, and swapped them if necessary to pass the range first. + * + * Returns a simplified replacement expression, or NULL if we can't simplify. + */ +static Node * +find_simplified_clause(PlannerInfo *root, Expr *rangeExpr, Expr *elemExpr) +{ + RangeType *range; + TypeCacheEntry *rangetypcache; + RangeBound lower; + RangeBound upper; + bool empty; + + /* can't do anything unless the range is a non-null constant */ + if (!IsA(rangeExpr, Const) || ((Const *) rangeExpr)->constisnull) + return NULL; + range = DatumGetRangeTypeP(((Const *) rangeExpr)->constvalue); + + rangetypcache = lookup_type_cache(RangeTypeGetOid(range), + TYPECACHE_RANGE_INFO); + if (rangetypcache->rngelemtype == NULL) + elog(ERROR, "type %u is not a range type", RangeTypeGetOid(range)); + + range_deserialize(rangetypcache, range, &lower, &upper, &empty); + + if (empty) + { + /* if the range is empty, then there can be no matches */ + return makeBoolConst(false, false); + } + else if (lower.infinite && upper.infinite) + { + /* the range has infinite bounds, so it matches everything */ + return makeBoolConst(true, false); + } + else + { + /* at least one bound is available, we have something to work with */ + TypeCacheEntry *elemTypcache = rangetypcache->rngelemtype; + Oid opfamily = rangetypcache->rng_opfamily; + Oid rng_collation = rangetypcache->rng_collation; + Expr *lowerExpr = NULL; + Expr *upperExpr = NULL; + + if (!lower.infinite && !upper.infinite) + { + /* + * When both bounds are present, we have a problem: the + * "simplified" clause would need to evaluate the elemExpr twice. + * That's definitely not okay if the elemExpr is volatile, and + * it's also unattractive if the elemExpr is expensive. + */ + QualCost eval_cost; + + if (contain_volatile_functions((Node *) elemExpr)) + return NULL; + + /* + * We define "expensive" as "contains any subplan or more than 10 + * operators". Note that the subplan search has to be done + * explicitly, since cost_qual_eval() will barf on unplanned + * subselects. + */ + if (contain_subplans((Node *) elemExpr)) + return NULL; + cost_qual_eval_node(&eval_cost, (Node *) elemExpr, root); + if (eval_cost.startup + eval_cost.per_tuple > + 10 * cpu_operator_cost) + return NULL; + } + + /* Okay, try to build boundary comparison expressions */ + if (!lower.infinite) + { + lowerExpr = build_bound_expr(elemExpr, + lower.val, + true, + lower.inclusive, + elemTypcache, + opfamily, + rng_collation); + if (lowerExpr == NULL) + return NULL; + } + + if (!upper.infinite) + { + /* Copy the elemExpr if we need two copies */ + if (!lower.infinite) + elemExpr = copyObject(elemExpr); + upperExpr = build_bound_expr(elemExpr, + upper.val, + false, + upper.inclusive, + elemTypcache, + opfamily, + rng_collation); + if (upperExpr == NULL) + return NULL; + } + + if (lowerExpr != NULL && upperExpr != NULL) + return (Node *) make_andclause(list_make2(lowerExpr, upperExpr)); + else if (lowerExpr != NULL) + return (Node *) lowerExpr; + else if (upperExpr != NULL) + return (Node *) upperExpr; + else + { + Assert(false); + return NULL; + } + } +} + +/* + * Helper function for find_simplified_clause(). + * + * Build the expression (elemExpr Operator val), where the operator is + * the appropriate member of the given opfamily depending on + * isLowerBound and isInclusive. typeCache is the typcache entry for + * the "val" value (presently, this will be the same type as elemExpr). + * rng_collation is the collation to use in the comparison. + * + * Return NULL on failure (if, for some reason, we can't find the operator). + */ +static Expr * +build_bound_expr(Expr *elemExpr, Datum val, + bool isLowerBound, bool isInclusive, + TypeCacheEntry *typeCache, + Oid opfamily, Oid rng_collation) +{ + Oid elemType = typeCache->type_id; + int16 elemTypeLen = typeCache->typlen; + bool elemByValue = typeCache->typbyval; + Oid elemCollation = typeCache->typcollation; + int16 strategy; + Oid oproid; + Expr *constExpr; + + /* Identify the comparison operator to use */ + if (isLowerBound) + strategy = isInclusive ? BTGreaterEqualStrategyNumber : BTGreaterStrategyNumber; + else + strategy = isInclusive ? BTLessEqualStrategyNumber : BTLessStrategyNumber; + + /* + * We could use exprType(elemExpr) here, if it ever becomes possible that + * elemExpr is not the exact same type as the range elements. + */ + oproid = get_opfamily_member(opfamily, elemType, elemType, strategy); + + /* We don't really expect failure here, but just in case ... */ + if (!OidIsValid(oproid)) + return NULL; + + /* OK, convert "val" to a full-fledged Const node, and make the OpExpr */ + constExpr = (Expr *) makeConst(elemType, + -1, + elemCollation, + elemTypeLen, + val, + false, + elemByValue); + + return make_opclause(oproid, + BOOLOID, + false, + elemExpr, + constExpr, + InvalidOid, + rng_collation); +} diff --git a/src/backend/utils/adt/rangetypes_selfuncs.c b/src/backend/utils/adt/rangetypes_selfuncs.c index c260012bd0..3431c3cd98 100644 --- a/src/backend/utils/adt/rangetypes_selfuncs.c +++ b/src/backend/utils/adt/rangetypes_selfuncs.c @@ -196,9 +196,10 @@ rangesel(PG_FUNCTION_ARGS) else if (operator == OID_RANGE_ELEM_CONTAINED_OP) { /* - * Here, the Var is the elem, not the range. For now we just punt and - * return the default estimate. In future we could disassemble the - * range constant and apply scalarineqsel ... + * Here, the Var is the elem, not the range. In typical cases + * elem_contained_by_range_support will have simplified this case, so + * that we won't get here. If we do get here we'll fall back on a + * default estimate. */ } else if (((Const *) other)->consttype == vardata.vartype) diff --git a/src/backend/utils/cache/typcache.c b/src/backend/utils/cache/typcache.c index 4625be4d5c..84fc83cb11 100644 --- a/src/backend/utils/cache/typcache.c +++ b/src/backend/utils/cache/typcache.c @@ -940,6 +940,7 @@ load_rangetype_info(TypeCacheEntry *typentry) /* get opclass properties and look up the comparison function */ opfamilyOid = get_opclass_family(opclassOid); opcintype = get_opclass_input_type(opclassOid); + typentry->rng_opfamily = opfamilyOid; cmpFnOid = get_opfamily_proc(opfamilyOid, opcintype, opcintype, BTORDER_PROC); diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 6496b3ffc9..6cbe33b6fa 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -57,6 +57,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 202401191 +#define CATALOG_VERSION_NO 202401201 #endif diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index a0277e57c7..ad74e07dbb 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -10503,13 +10503,15 @@ proname => 'range_overlaps', prorettype => 'bool', proargtypes => 'anyrange anyrange', prosrc => 'range_overlaps' }, { oid => '3858', - proname => 'range_contains_elem', prorettype => 'bool', - proargtypes => 'anyrange anyelement', prosrc => 'range_contains_elem' }, + proname => 'range_contains_elem', prosupport => 'range_contains_elem_support', + prorettype => 'bool', proargtypes => 'anyrange anyelement', + prosrc => 'range_contains_elem' }, { oid => '3859', proname => 'range_contains', prorettype => 'bool', proargtypes => 'anyrange anyrange', prosrc => 'range_contains' }, { oid => '3860', - proname => 'elem_contained_by_range', prorettype => 'bool', + proname => 'elem_contained_by_range', + prosupport => 'elem_contained_by_range_support', prorettype => 'bool', proargtypes => 'anyelement anyrange', prosrc => 'elem_contained_by_range' }, { oid => '3861', proname => 'range_contained_by', prorettype => 'bool', @@ -10532,6 +10534,12 @@ { oid => '3867', proname => 'range_union', prorettype => 'anyrange', proargtypes => 'anyrange anyrange', prosrc => 'range_union' }, +{ oid => '9998', descr => 'planner support for range_contains_elem', + proname => 'range_contains_elem_support', prorettype => 'internal', + proargtypes => 'internal', prosrc => 'range_contains_elem_support' }, +{ oid => '9999', descr => 'planner support for elem_contained_by_range', + proname => 'elem_contained_by_range_support', prorettype => 'internal', + proargtypes => 'internal', prosrc => 'elem_contained_by_range_support' }, { oid => '4057', descr => 'the smallest range which includes both of the given ranges', proname => 'range_merge', prorettype => 'anyrange', diff --git a/src/include/utils/typcache.h b/src/include/utils/typcache.h index b72daa7fb4..f506cc4aa3 100644 --- a/src/include/utils/typcache.h +++ b/src/include/utils/typcache.h @@ -96,6 +96,7 @@ typedef struct TypeCacheEntry * btree comparison function. */ struct TypeCacheEntry *rngelemtype; /* range's element type */ + Oid rng_opfamily; /* opfamily to use for range comparisons */ Oid rng_collation; /* collation for comparisons, if any */ FmgrInfo rng_cmp_proc_finfo; /* comparison function */ FmgrInfo rng_canonical_finfo; /* canonicalization function, if any */ diff --git a/src/test/regress/expected/rangetypes.out b/src/test/regress/expected/rangetypes.out index 07d5621ef8..a7cc220bf0 100644 --- a/src/test/regress/expected/rangetypes.out +++ b/src/test/regress/expected/rangetypes.out @@ -1834,3 +1834,145 @@ create function table_fail(i anyelement) returns table(i anyelement, r anyrange) as $$ select $1, '[1,10]' $$ language sql; ERROR: cannot determine result data type DETAIL: A result of type anyrange requires at least one input of type anyrange or anymultirange. +-- +-- Test support functions +-- +-- empty range +explain (verbose, costs off) +select current_date <@ daterange 'empty'; + QUERY PLAN +----------------- + Result + Output: false +(2 rows) + +-- unbounded range +explain (verbose, costs off) +select current_date <@ daterange(NULL, NULL); + QUERY PLAN +---------------- + Result + Output: true +(2 rows) + +-- only lower bound present +explain (verbose, costs off) +select current_date <@ daterange('2000-01-01', NULL, '[)'); + QUERY PLAN +------------------------------------------------ + Result + Output: (CURRENT_DATE >= '01-01-2000'::date) +(2 rows) + +-- only upper bound present +explain (verbose, costs off) +select current_date <@ daterange(NULL, '2000-01-01', '(]'); + QUERY PLAN +----------------------------------------------- + Result + Output: (CURRENT_DATE < '01-02-2000'::date) +(2 rows) + +-- lower range "-Infinity" excluded +explain (verbose, costs off) +select current_date <@ daterange('-Infinity', '1997-04-10'::date, '()'); + QUERY PLAN +---------------------------------------------------------------------------------------- + Result + Output: ((CURRENT_DATE > '-infinity'::date) AND (CURRENT_DATE < '04-10-1997'::date)) +(2 rows) + +-- lower range "-Infinity" included +explain (verbose, costs off) +select current_date <@ daterange('-Infinity', '1997-04-10'::date, '[)'); + QUERY PLAN +----------------------------------------------------------------------------------------- + Result + Output: ((CURRENT_DATE >= '-infinity'::date) AND (CURRENT_DATE < '04-10-1997'::date)) +(2 rows) + +-- upper range "Infinity" excluded +explain (verbose, costs off) +select current_date <@ daterange('2002-09-25'::date, 'Infinity', '[)'); + QUERY PLAN +---------------------------------------------------------------------------------------- + Result + Output: ((CURRENT_DATE >= '09-25-2002'::date) AND (CURRENT_DATE < 'infinity'::date)) +(2 rows) + +-- upper range "Infinity" included +explain (verbose, costs off) +select current_date <@ daterange('2002-09-25'::date, 'Infinity', '[]'); + QUERY PLAN +----------------------------------------------------------------------------------------- + Result + Output: ((CURRENT_DATE >= '09-25-2002'::date) AND (CURRENT_DATE <= 'infinity'::date)) +(2 rows) + +-- should also work if we use "@>" +explain (verbose, costs off) +select daterange('-Infinity', '1997-04-10'::date, '()') @> current_date; + QUERY PLAN +---------------------------------------------------------------------------------------- + Result + Output: ((CURRENT_DATE > '-infinity'::date) AND (CURRENT_DATE < '04-10-1997'::date)) +(2 rows) + +explain (verbose, costs off) +select daterange('2002-09-25'::date, 'Infinity', '[]') @> current_date; + QUERY PLAN +----------------------------------------------------------------------------------------- + Result + Output: ((CURRENT_DATE >= '09-25-2002'::date) AND (CURRENT_DATE <= 'infinity'::date)) +(2 rows) + +-- Check that volatile cases are not optimized +explain (verbose, costs off) +select now() <@ tstzrange('2024-01-20 00:00', '2024-01-21 00:00'); + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------------------------- + Result + Output: ((now() >= 'Sat Jan 20 00:00:00 2024 PST'::timestamp with time zone) AND (now() < 'Sun Jan 21 00:00:00 2024 PST'::timestamp with time zone)) +(2 rows) + +explain (verbose, costs off) -- unsafe! +select clock_timestamp() <@ tstzrange('2024-01-20 00:00', '2024-01-21 00:00'); + QUERY PLAN +--------------------------------------------------------------------------------------------------------------- + Result + Output: (clock_timestamp() <@ '["Sat Jan 20 00:00:00 2024 PST","Sun Jan 21 00:00:00 2024 PST")'::tstzrange) +(2 rows) + +explain (verbose, costs off) +select clock_timestamp() <@ tstzrange('2024-01-20 00:00', NULL); + QUERY PLAN +------------------------------------------------------------------------------------------- + Result + Output: (clock_timestamp() >= 'Sat Jan 20 00:00:00 2024 PST'::timestamp with time zone) +(2 rows) + +-- test a custom range type with a non-default operator class +create type textrange_supp as range ( + subtype = text, + subtype_opclass = text_pattern_ops +); +create temp table text_support_test (t text collate "C"); +insert into text_support_test values ('a'), ('c'), ('d'), ('ch'); +explain (costs off) +select * from text_support_test where t <@ textrange_supp('a', 'd'); + QUERY PLAN +------------------------------------------------------ + Seq Scan on text_support_test + Filter: ((t ~>=~ 'a'::text) AND (t ~<~ 'd'::text)) +(2 rows) + +select * from text_support_test where t <@ textrange_supp('a', 'd'); + t +---- + a + c + ch +(3 rows) + +drop table text_support_test; +drop type textrange_supp; diff --git a/src/test/regress/sql/rangetypes.sql b/src/test/regress/sql/rangetypes.sql index c5dbe0c04f..a5ecdf5372 100644 --- a/src/test/regress/sql/rangetypes.sql +++ b/src/test/regress/sql/rangetypes.sql @@ -629,3 +629,72 @@ create function inoutparam_fail(inout i anyelement, out r anyrange) --should fail create function table_fail(i anyelement) returns table(i anyelement, r anyrange) as $$ select $1, '[1,10]' $$ language sql; + +-- +-- Test support functions +-- + +-- empty range +explain (verbose, costs off) +select current_date <@ daterange 'empty'; + +-- unbounded range +explain (verbose, costs off) +select current_date <@ daterange(NULL, NULL); + +-- only lower bound present +explain (verbose, costs off) +select current_date <@ daterange('2000-01-01', NULL, '[)'); + +-- only upper bound present +explain (verbose, costs off) +select current_date <@ daterange(NULL, '2000-01-01', '(]'); + +-- lower range "-Infinity" excluded +explain (verbose, costs off) +select current_date <@ daterange('-Infinity', '1997-04-10'::date, '()'); + +-- lower range "-Infinity" included +explain (verbose, costs off) +select current_date <@ daterange('-Infinity', '1997-04-10'::date, '[)'); + +-- upper range "Infinity" excluded +explain (verbose, costs off) +select current_date <@ daterange('2002-09-25'::date, 'Infinity', '[)'); + +-- upper range "Infinity" included +explain (verbose, costs off) +select current_date <@ daterange('2002-09-25'::date, 'Infinity', '[]'); + +-- should also work if we use "@>" +explain (verbose, costs off) +select daterange('-Infinity', '1997-04-10'::date, '()') @> current_date; + +explain (verbose, costs off) +select daterange('2002-09-25'::date, 'Infinity', '[]') @> current_date; + +-- Check that volatile cases are not optimized +explain (verbose, costs off) +select now() <@ tstzrange('2024-01-20 00:00', '2024-01-21 00:00'); +explain (verbose, costs off) -- unsafe! +select clock_timestamp() <@ tstzrange('2024-01-20 00:00', '2024-01-21 00:00'); +explain (verbose, costs off) +select clock_timestamp() <@ tstzrange('2024-01-20 00:00', NULL); + +-- test a custom range type with a non-default operator class +create type textrange_supp as range ( + subtype = text, + subtype_opclass = text_pattern_ops +); + +create temp table text_support_test (t text collate "C"); + +insert into text_support_test values ('a'), ('c'), ('d'), ('ch'); + +explain (costs off) +select * from text_support_test where t <@ textrange_supp('a', 'd'); +select * from text_support_test where t <@ textrange_supp('a', 'd'); + +drop table text_support_test; + +drop type textrange_supp; -- 2.30.2