Add planner support functions for range operators <@ and @>.
authorTom Lane <tgl@sss.pgh.pa.us>
Sat, 20 Jan 2024 18:57:54 +0000 (13:57 -0500)
committerTom Lane <tgl@sss.pgh.pa.us>
Sat, 20 Jan 2024 18:57:54 +0000 (13:57 -0500)
These support functions will transform expressions with constant
range values into direct comparisons on the range bound values,
which are frequently better-optimizable.  The transformation is
skipped however if it would require double evaluation of a
volatile or expensive element expression.

Along the way, add the range opfamily OID to range typcache entries,
since load_rangetype_info has to compute that anyway and it seems
silly to duplicate the work later.

Kim Johan Andersson and Jian He, reviewed by Laurenz Albe

Discussion: https://postgr.es/m/94f64d1f-b8c0-b0c5-98bc-0793a34e0851@kimmet.dk

src/backend/utils/adt/rangetypes.c
src/backend/utils/adt/rangetypes_selfuncs.c
src/backend/utils/cache/typcache.c
src/include/catalog/catversion.h
src/include/catalog/pg_proc.dat
src/include/utils/typcache.h
src/test/regress/expected/rangetypes.out
src/test/regress/sql/rangetypes.sql

index d99b00b5908f09491c3cab872598cadbb91489f1..2d94a6b877456d8799b554786b6e9150d49232cc 100644 (file)
  */
 #include "postgres.h"
 
-#include "access/tupmacs.h"
 #include "common/hashfn.h"
-#include "lib/stringinfo.h"
 #include "libpq/pqformat.h"
 #include "miscadmin.h"
+#include "nodes/makefuncs.h"
 #include "nodes/miscnodes.h"
-#include "port/pg_bitutils.h"
+#include "nodes/supportnodes.h"
+#include "optimizer/clauses.h"
+#include "optimizer/cost.h"
+#include "optimizer/optimizer.h"
 #include "utils/builtins.h"
 #include "utils/date.h"
 #include "utils/lsyscache.h"
 #include "utils/rangetypes.h"
 #include "utils/timestamp.h"
-#include "varatt.h"
 
 
 /* fn_extra cache entry for one of the range I/O functions */
@@ -69,6 +70,12 @@ static Size datum_compute_size(Size data_length, Datum val, bool typbyval,
                                                           char typalign, int16 typlen, char typstorage);
 static Pointer datum_write(Pointer ptr, Datum datum, bool typbyval,
                                                   char typalign, int16 typlen, char typstorage);
+static Node *find_simplified_clause(PlannerInfo *root,
+                                                                       Expr *rangeExpr, Expr *elemExpr);
+static Expr *build_bound_expr(Expr *elemExpr, Datum val,
+                                                         bool isLowerBound, bool isInclusive,
+                                                         TypeCacheEntry *typeCache,
+                                                         Oid opfamily, Oid rng_collation);
 
 
 /*
@@ -2173,6 +2180,58 @@ make_empty_range(TypeCacheEntry *typcache)
        return make_range(typcache, &lower, &upper, true, NULL);
 }
 
+/*
+ * Planner support function for elem_contained_by_range (<@ operator).
+ */
+Datum
+elem_contained_by_range_support(PG_FUNCTION_ARGS)
+{
+       Node       *rawreq = (Node *) PG_GETARG_POINTER(0);
+       Node       *ret = NULL;
+
+       if (IsA(rawreq, SupportRequestSimplify))
+       {
+               SupportRequestSimplify *req = (SupportRequestSimplify *) rawreq;
+               FuncExpr   *fexpr = req->fcall;
+               Expr       *leftop,
+                                  *rightop;
+
+               Assert(list_length(fexpr->args) == 2);
+               leftop = linitial(fexpr->args);
+               rightop = lsecond(fexpr->args);
+
+               ret = find_simplified_clause(req->root, rightop, leftop);
+       }
+
+       PG_RETURN_POINTER(ret);
+}
+
+/*
+ * Planner support function for range_contains_elem (@> operator).
+ */
+Datum
+range_contains_elem_support(PG_FUNCTION_ARGS)
+{
+       Node       *rawreq = (Node *) PG_GETARG_POINTER(0);
+       Node       *ret = NULL;
+
+       if (IsA(rawreq, SupportRequestSimplify))
+       {
+               SupportRequestSimplify *req = (SupportRequestSimplify *) rawreq;
+               FuncExpr   *fexpr = req->fcall;
+               Expr       *leftop,
+                                  *rightop;
+
+               Assert(list_length(fexpr->args) == 2);
+               leftop = linitial(fexpr->args);
+               rightop = lsecond(fexpr->args);
+
+               ret = find_simplified_clause(req->root, leftop, rightop);
+       }
+
+       PG_RETURN_POINTER(ret);
+}
+
 
 /*
  *----------------------------------------------------------
@@ -2715,3 +2774,180 @@ datum_write(Pointer ptr, Datum datum, bool typbyval, char typalign,
 
        return ptr;
 }
+
+/*
+ * Common code for the elem_contained_by_range and range_contains_elem
+ * support functions.  The caller has extracted the function argument
+ * expressions, and swapped them if necessary to pass the range first.
+ *
+ * Returns a simplified replacement expression, or NULL if we can't simplify.
+ */
+static Node *
+find_simplified_clause(PlannerInfo *root, Expr *rangeExpr, Expr *elemExpr)
+{
+       RangeType  *range;
+       TypeCacheEntry *rangetypcache;
+       RangeBound      lower;
+       RangeBound      upper;
+       bool            empty;
+
+       /* can't do anything unless the range is a non-null constant */
+       if (!IsA(rangeExpr, Const) || ((Const *) rangeExpr)->constisnull)
+               return NULL;
+       range = DatumGetRangeTypeP(((Const *) rangeExpr)->constvalue);
+
+       rangetypcache = lookup_type_cache(RangeTypeGetOid(range),
+                                                                         TYPECACHE_RANGE_INFO);
+       if (rangetypcache->rngelemtype == NULL)
+               elog(ERROR, "type %u is not a range type", RangeTypeGetOid(range));
+
+       range_deserialize(rangetypcache, range, &lower, &upper, &empty);
+
+       if (empty)
+       {
+               /* if the range is empty, then there can be no matches */
+               return makeBoolConst(false, false);
+       }
+       else if (lower.infinite && upper.infinite)
+       {
+               /* the range has infinite bounds, so it matches everything */
+               return makeBoolConst(true, false);
+       }
+       else
+       {
+               /* at least one bound is available, we have something to work with */
+               TypeCacheEntry *elemTypcache = rangetypcache->rngelemtype;
+               Oid                     opfamily = rangetypcache->rng_opfamily;
+               Oid                     rng_collation = rangetypcache->rng_collation;
+               Expr       *lowerExpr = NULL;
+               Expr       *upperExpr = NULL;
+
+               if (!lower.infinite && !upper.infinite)
+               {
+                       /*
+                        * When both bounds are present, we have a problem: the
+                        * "simplified" clause would need to evaluate the elemExpr twice.
+                        * That's definitely not okay if the elemExpr is volatile, and
+                        * it's also unattractive if the elemExpr is expensive.
+                        */
+                       QualCost        eval_cost;
+
+                       if (contain_volatile_functions((Node *) elemExpr))
+                               return NULL;
+
+                       /*
+                        * We define "expensive" as "contains any subplan or more than 10
+                        * operators".  Note that the subplan search has to be done
+                        * explicitly, since cost_qual_eval() will barf on unplanned
+                        * subselects.
+                        */
+                       if (contain_subplans((Node *) elemExpr))
+                               return NULL;
+                       cost_qual_eval_node(&eval_cost, (Node *) elemExpr, root);
+                       if (eval_cost.startup + eval_cost.per_tuple >
+                               10 * cpu_operator_cost)
+                               return NULL;
+               }
+
+               /* Okay, try to build boundary comparison expressions */
+               if (!lower.infinite)
+               {
+                       lowerExpr = build_bound_expr(elemExpr,
+                                                                                lower.val,
+                                                                                true,
+                                                                                lower.inclusive,
+                                                                                elemTypcache,
+                                                                                opfamily,
+                                                                                rng_collation);
+                       if (lowerExpr == NULL)
+                               return NULL;
+               }
+
+               if (!upper.infinite)
+               {
+                       /* Copy the elemExpr if we need two copies */
+                       if (!lower.infinite)
+                               elemExpr = copyObject(elemExpr);
+                       upperExpr = build_bound_expr(elemExpr,
+                                                                                upper.val,
+                                                                                false,
+                                                                                upper.inclusive,
+                                                                                elemTypcache,
+                                                                                opfamily,
+                                                                                rng_collation);
+                       if (upperExpr == NULL)
+                               return NULL;
+               }
+
+               if (lowerExpr != NULL && upperExpr != NULL)
+                       return (Node *) make_andclause(list_make2(lowerExpr, upperExpr));
+               else if (lowerExpr != NULL)
+                       return (Node *) lowerExpr;
+               else if (upperExpr != NULL)
+                       return (Node *) upperExpr;
+               else
+               {
+                       Assert(false);
+                       return NULL;
+               }
+       }
+}
+
+/*
+ * Helper function for find_simplified_clause().
+ *
+ * Build the expression (elemExpr Operator val), where the operator is
+ * the appropriate member of the given opfamily depending on
+ * isLowerBound and isInclusive.  typeCache is the typcache entry for
+ * the "val" value (presently, this will be the same type as elemExpr).
+ * rng_collation is the collation to use in the comparison.
+ *
+ * Return NULL on failure (if, for some reason, we can't find the operator).
+ */
+static Expr *
+build_bound_expr(Expr *elemExpr, Datum val,
+                                bool isLowerBound, bool isInclusive,
+                                TypeCacheEntry *typeCache,
+                                Oid opfamily, Oid rng_collation)
+{
+       Oid                     elemType = typeCache->type_id;
+       int16           elemTypeLen = typeCache->typlen;
+       bool            elemByValue = typeCache->typbyval;
+       Oid                     elemCollation = typeCache->typcollation;
+       int16           strategy;
+       Oid                     oproid;
+       Expr       *constExpr;
+
+       /* Identify the comparison operator to use */
+       if (isLowerBound)
+               strategy = isInclusive ? BTGreaterEqualStrategyNumber : BTGreaterStrategyNumber;
+       else
+               strategy = isInclusive ? BTLessEqualStrategyNumber : BTLessStrategyNumber;
+
+       /*
+        * We could use exprType(elemExpr) here, if it ever becomes possible that
+        * elemExpr is not the exact same type as the range elements.
+        */
+       oproid = get_opfamily_member(opfamily, elemType, elemType, strategy);
+
+       /* We don't really expect failure here, but just in case ... */
+       if (!OidIsValid(oproid))
+               return NULL;
+
+       /* OK, convert "val" to a full-fledged Const node, and make the OpExpr */
+       constExpr = (Expr *) makeConst(elemType,
+                                                                  -1,
+                                                                  elemCollation,
+                                                                  elemTypeLen,
+                                                                  val,
+                                                                  false,
+                                                                  elemByValue);
+
+       return make_opclause(oproid,
+                                                BOOLOID,
+                                                false,
+                                                elemExpr,
+                                                constExpr,
+                                                InvalidOid,
+                                                rng_collation);
+}
index c260012bd010795d5acad3a63e6a79674c8dd883..3431c3cd982af359320e035d516660ff2d0b689f 100644 (file)
@@ -196,9 +196,10 @@ rangesel(PG_FUNCTION_ARGS)
        else if (operator == OID_RANGE_ELEM_CONTAINED_OP)
        {
                /*
-                * Here, the Var is the elem, not the range.  For now we just punt and
-                * return the default estimate.  In future we could disassemble the
-                * range constant and apply scalarineqsel ...
+                * Here, the Var is the elem, not the range.  In typical cases
+                * elem_contained_by_range_support will have simplified this case, so
+                * that we won't get here.  If we do get here we'll fall back on a
+                * default estimate.
                 */
        }
        else if (((Const *) other)->consttype == vardata.vartype)
index 4625be4d5c5cd48be1567458fd7250172cab1b13..84fc83cb11f9192e74b445f7eff9c0c681f15500 100644 (file)
@@ -940,6 +940,7 @@ load_rangetype_info(TypeCacheEntry *typentry)
        /* get opclass properties and look up the comparison function */
        opfamilyOid = get_opclass_family(opclassOid);
        opcintype = get_opclass_input_type(opclassOid);
+       typentry->rng_opfamily = opfamilyOid;
 
        cmpFnOid = get_opfamily_proc(opfamilyOid, opcintype, opcintype,
                                                                 BTORDER_PROC);
index 6496b3ffc9f597493d7449a015ff572b64511451..6cbe33b6fa876699b5d0e548098bac92cdbc87d5 100644 (file)
@@ -57,6 +57,6 @@
  */
 
 /*                                                     yyyymmddN */
-#define CATALOG_VERSION_NO     202401191
+#define CATALOG_VERSION_NO     202401201
 
 #endif
index a0277e57c7d68d3ac7b3755c5c1c01afb98b576e..ad74e07dbbdc2b0af65bfa936d3f62408dd5cd45 100644 (file)
   proname => 'range_overlaps', prorettype => 'bool',
   proargtypes => 'anyrange anyrange', prosrc => 'range_overlaps' },
 { oid => '3858',
-  proname => 'range_contains_elem', prorettype => 'bool',
-  proargtypes => 'anyrange anyelement', prosrc => 'range_contains_elem' },
+  proname => 'range_contains_elem', prosupport => 'range_contains_elem_support',
+  prorettype => 'bool', proargtypes => 'anyrange anyelement',
+  prosrc => 'range_contains_elem' },
 { oid => '3859',
   proname => 'range_contains', prorettype => 'bool',
   proargtypes => 'anyrange anyrange', prosrc => 'range_contains' },
 { oid => '3860',
-  proname => 'elem_contained_by_range', prorettype => 'bool',
+  proname => 'elem_contained_by_range',
+  prosupport => 'elem_contained_by_range_support', prorettype => 'bool',
   proargtypes => 'anyelement anyrange', prosrc => 'elem_contained_by_range' },
 { oid => '3861',
   proname => 'range_contained_by', prorettype => 'bool',
 { oid => '3867',
   proname => 'range_union', prorettype => 'anyrange',
   proargtypes => 'anyrange anyrange', prosrc => 'range_union' },
+{ oid => '9998', descr => 'planner support for range_contains_elem',
+  proname => 'range_contains_elem_support', prorettype => 'internal',
+  proargtypes => 'internal', prosrc => 'range_contains_elem_support' },
+{ oid => '9999', descr => 'planner support for elem_contained_by_range',
+  proname => 'elem_contained_by_range_support', prorettype => 'internal',
+  proargtypes => 'internal', prosrc => 'elem_contained_by_range_support' },
 { oid => '4057',
   descr => 'the smallest range which includes both of the given ranges',
   proname => 'range_merge', prorettype => 'anyrange',
index b72daa7fb443f1c43cc80bda2b6b48dace967970..f506cc4aa35d47698c8c3340c5225d060eaf72d8 100644 (file)
@@ -96,6 +96,7 @@ typedef struct TypeCacheEntry
         * btree comparison function.
         */
        struct TypeCacheEntry *rngelemtype; /* range's element type */
+       Oid                     rng_opfamily;   /* opfamily to use for range comparisons */
        Oid                     rng_collation;  /* collation for comparisons, if any */
        FmgrInfo        rng_cmp_proc_finfo; /* comparison function */
        FmgrInfo        rng_canonical_finfo;    /* canonicalization function, if any */
index 07d5621ef873ef46433ebe8a0ec103646c4def72..a7cc220bf0d64fbb44076c650125c719ea2bc1c5 100644 (file)
@@ -1834,3 +1834,145 @@ create function table_fail(i anyelement) returns table(i anyelement, r anyrange)
   as $$ select $1, '[1,10]' $$ language sql;
 ERROR:  cannot determine result data type
 DETAIL:  A result of type anyrange requires at least one input of type anyrange or anymultirange.
+--
+-- Test support functions
+--
+-- empty range
+explain (verbose, costs off)
+select current_date <@ daterange 'empty';
+   QUERY PLAN    
+-----------------
+ Result
+   Output: false
+(2 rows)
+
+-- unbounded range
+explain (verbose, costs off)
+select current_date <@ daterange(NULL, NULL);
+   QUERY PLAN   
+----------------
+ Result
+   Output: true
+(2 rows)
+
+-- only lower bound present
+explain (verbose, costs off)
+select current_date <@ daterange('2000-01-01', NULL, '[)');
+                   QUERY PLAN                   
+------------------------------------------------
+ Result
+   Output: (CURRENT_DATE >= '01-01-2000'::date)
+(2 rows)
+
+-- only upper bound present
+explain (verbose, costs off)
+select current_date <@ daterange(NULL, '2000-01-01', '(]');
+                  QUERY PLAN                   
+-----------------------------------------------
+ Result
+   Output: (CURRENT_DATE < '01-02-2000'::date)
+(2 rows)
+
+-- lower range "-Infinity" excluded
+explain (verbose, costs off)
+select current_date <@ daterange('-Infinity', '1997-04-10'::date, '()');
+                                       QUERY PLAN                                       
+----------------------------------------------------------------------------------------
+ Result
+   Output: ((CURRENT_DATE > '-infinity'::date) AND (CURRENT_DATE < '04-10-1997'::date))
+(2 rows)
+
+-- lower range "-Infinity" included
+explain (verbose, costs off)
+select current_date <@ daterange('-Infinity', '1997-04-10'::date, '[)');
+                                       QUERY PLAN                                        
+-----------------------------------------------------------------------------------------
+ Result
+   Output: ((CURRENT_DATE >= '-infinity'::date) AND (CURRENT_DATE < '04-10-1997'::date))
+(2 rows)
+
+-- upper range "Infinity" excluded
+explain (verbose, costs off)
+select current_date <@ daterange('2002-09-25'::date, 'Infinity', '[)');
+                                       QUERY PLAN                                       
+----------------------------------------------------------------------------------------
+ Result
+   Output: ((CURRENT_DATE >= '09-25-2002'::date) AND (CURRENT_DATE < 'infinity'::date))
+(2 rows)
+
+-- upper range "Infinity" included
+explain (verbose, costs off)
+select current_date <@ daterange('2002-09-25'::date, 'Infinity', '[]');
+                                       QUERY PLAN                                        
+-----------------------------------------------------------------------------------------
+ Result
+   Output: ((CURRENT_DATE >= '09-25-2002'::date) AND (CURRENT_DATE <= 'infinity'::date))
+(2 rows)
+
+-- should also work if we use "@>"
+explain (verbose, costs off)
+select daterange('-Infinity', '1997-04-10'::date, '()') @> current_date;
+                                       QUERY PLAN                                       
+----------------------------------------------------------------------------------------
+ Result
+   Output: ((CURRENT_DATE > '-infinity'::date) AND (CURRENT_DATE < '04-10-1997'::date))
+(2 rows)
+
+explain (verbose, costs off)
+select daterange('2002-09-25'::date, 'Infinity', '[]') @> current_date;
+                                       QUERY PLAN                                        
+-----------------------------------------------------------------------------------------
+ Result
+   Output: ((CURRENT_DATE >= '09-25-2002'::date) AND (CURRENT_DATE <= 'infinity'::date))
+(2 rows)
+
+-- Check that volatile cases are not optimized
+explain (verbose, costs off)
+select now() <@ tstzrange('2024-01-20 00:00', '2024-01-21 00:00');
+                                                                       QUERY PLAN                                                                       
+--------------------------------------------------------------------------------------------------------------------------------------------------------
+ Result
+   Output: ((now() >= 'Sat Jan 20 00:00:00 2024 PST'::timestamp with time zone) AND (now() < 'Sun Jan 21 00:00:00 2024 PST'::timestamp with time zone))
+(2 rows)
+
+explain (verbose, costs off)  -- unsafe!
+select clock_timestamp() <@ tstzrange('2024-01-20 00:00', '2024-01-21 00:00');
+                                                  QUERY PLAN                                                   
+---------------------------------------------------------------------------------------------------------------
+ Result
+   Output: (clock_timestamp() <@ '["Sat Jan 20 00:00:00 2024 PST","Sun Jan 21 00:00:00 2024 PST")'::tstzrange)
+(2 rows)
+
+explain (verbose, costs off)
+select clock_timestamp() <@ tstzrange('2024-01-20 00:00', NULL);
+                                        QUERY PLAN                                         
+-------------------------------------------------------------------------------------------
+ Result
+   Output: (clock_timestamp() >= 'Sat Jan 20 00:00:00 2024 PST'::timestamp with time zone)
+(2 rows)
+
+-- test a custom range type with a non-default operator class
+create type textrange_supp as range (
+   subtype = text,
+   subtype_opclass = text_pattern_ops
+);
+create temp table text_support_test (t text collate "C");
+insert into text_support_test values ('a'), ('c'), ('d'), ('ch');
+explain (costs off)
+select * from text_support_test where t <@ textrange_supp('a', 'd');
+                      QUERY PLAN                      
+------------------------------------------------------
+ Seq Scan on text_support_test
+   Filter: ((t ~>=~ 'a'::text) AND (t ~<~ 'd'::text))
+(2 rows)
+
+select * from text_support_test where t <@ textrange_supp('a', 'd');
+ t  
+----
+ a
+ c
+ ch
+(3 rows)
+
+drop table text_support_test;
+drop type textrange_supp;
index c5dbe0c04f1bb6f503a9ce80fd536b6ece853ec5..a5ecdf5372f5d245d5824176d9dd1bbfe03ec606 100644 (file)
@@ -629,3 +629,72 @@ create function inoutparam_fail(inout i anyelement, out r anyrange)
 --should fail
 create function table_fail(i anyelement) returns table(i anyelement, r anyrange)
   as $$ select $1, '[1,10]' $$ language sql;
+
+--
+-- Test support functions
+--
+
+-- empty range
+explain (verbose, costs off)
+select current_date <@ daterange 'empty';
+
+-- unbounded range
+explain (verbose, costs off)
+select current_date <@ daterange(NULL, NULL);
+
+-- only lower bound present
+explain (verbose, costs off)
+select current_date <@ daterange('2000-01-01', NULL, '[)');
+
+-- only upper bound present
+explain (verbose, costs off)
+select current_date <@ daterange(NULL, '2000-01-01', '(]');
+
+-- lower range "-Infinity" excluded
+explain (verbose, costs off)
+select current_date <@ daterange('-Infinity', '1997-04-10'::date, '()');
+
+-- lower range "-Infinity" included
+explain (verbose, costs off)
+select current_date <@ daterange('-Infinity', '1997-04-10'::date, '[)');
+
+-- upper range "Infinity" excluded
+explain (verbose, costs off)
+select current_date <@ daterange('2002-09-25'::date, 'Infinity', '[)');
+
+-- upper range "Infinity" included
+explain (verbose, costs off)
+select current_date <@ daterange('2002-09-25'::date, 'Infinity', '[]');
+
+-- should also work if we use "@>"
+explain (verbose, costs off)
+select daterange('-Infinity', '1997-04-10'::date, '()') @> current_date;
+
+explain (verbose, costs off)
+select daterange('2002-09-25'::date, 'Infinity', '[]') @> current_date;
+
+-- Check that volatile cases are not optimized
+explain (verbose, costs off)
+select now() <@ tstzrange('2024-01-20 00:00', '2024-01-21 00:00');
+explain (verbose, costs off)  -- unsafe!
+select clock_timestamp() <@ tstzrange('2024-01-20 00:00', '2024-01-21 00:00');
+explain (verbose, costs off)
+select clock_timestamp() <@ tstzrange('2024-01-20 00:00', NULL);
+
+-- test a custom range type with a non-default operator class
+create type textrange_supp as range (
+   subtype = text,
+   subtype_opclass = text_pattern_ops
+);
+
+create temp table text_support_test (t text collate "C");
+
+insert into text_support_test values ('a'), ('c'), ('d'), ('ch');
+
+explain (costs off)
+select * from text_support_test where t <@ textrange_supp('a', 'd');
+select * from text_support_test where t <@ textrange_supp('a', 'd');
+
+drop table text_support_test;
+
+drop type textrange_supp;