Add a planner support function for starts_with().
authorTom Lane <tgl@sss.pgh.pa.us>
Wed, 17 Nov 2021 21:54:12 +0000 (16:54 -0500)
committerTom Lane <tgl@sss.pgh.pa.us>
Wed, 17 Nov 2021 21:54:12 +0000 (16:54 -0500)
This fills in some gaps in planner support for starts_with() and
the equivalent ^@ operator:

* A condition such as "textcol ^@ constant" can now use a regular
btree index, not only an SP-GiST index, so long as the index's
collation is C.  (This works just like "textcol LIKE 'foo%'".)

* "starts_with(textcol, constant)" can be optimized the same as
"textcol ^@ constant".

* Fixed-prefix LIKE and regex patterns are now more like starts_with()
in another way: if you apply one to an SPGiST-indexed column, you'll
get an index condition using ^@ rather than two index conditions with
>= and <.

Per a complaint from Shay Rojansky.  Patch by me; thanks to
Nathan Bossart for review.

Discussion: https://postgr.es/m/232599.1633800229@sss.pgh.pa.us

src/backend/utils/adt/like_support.c
src/include/catalog/catversion.h
src/include/catalog/pg_operator.dat
src/include/catalog/pg_proc.dat
src/test/regress/expected/create_index_spgist.out
src/test/regress/sql/create_index_spgist.sql

index 241e6f0f598ae2f5ddd12bb5e804c594389c89dd..988568825e086c337eecd3e94439e43608bb7d37 100644 (file)
@@ -143,6 +143,14 @@ texticregexeq_support(PG_FUNCTION_ARGS)
        PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Regex_IC));
 }
 
+Datum
+text_starts_with_support(PG_FUNCTION_ARGS)
+{
+       Node       *rawreq = (Node *) PG_GETARG_POINTER(0);
+
+       PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Prefix));
+}
+
 /* Common code for the above */
 static Node *
 like_regex_support(Node *rawreq, Pattern_Type ptype)
@@ -246,6 +254,7 @@ match_pattern_prefix(Node *leftop,
        Oid                     eqopr;
        Oid                     ltopr;
        Oid                     geopr;
+       Oid                     preopr = InvalidOid;
        bool            collation_aware;
        Expr       *expr;
        FmgrInfo        ltproc;
@@ -302,12 +311,20 @@ match_pattern_prefix(Node *leftop,
        switch (ldatatype)
        {
                case TEXTOID:
-                       if (opfamily == TEXT_PATTERN_BTREE_FAM_OID ||
-                               opfamily == TEXT_SPGIST_FAM_OID)
+                       if (opfamily == TEXT_PATTERN_BTREE_FAM_OID)
+                       {
+                               eqopr = TextEqualOperator;
+                               ltopr = TextPatternLessOperator;
+                               geopr = TextPatternGreaterEqualOperator;
+                               collation_aware = false;
+                       }
+                       else if (opfamily == TEXT_SPGIST_FAM_OID)
                        {
                                eqopr = TextEqualOperator;
                                ltopr = TextPatternLessOperator;
                                geopr = TextPatternGreaterEqualOperator;
+                               /* This opfamily has direct support for prefixing */
+                               preopr = TextPrefixOperator;
                                collation_aware = false;
                        }
                        else
@@ -360,20 +377,6 @@ match_pattern_prefix(Node *leftop,
                        return NIL;
        }
 
-       /*
-        * If necessary, verify that the index's collation behavior is compatible.
-        * For an exact-match case, we don't have to be picky.  Otherwise, insist
-        * that the index collation be "C".  Note that here we are looking at the
-        * index's collation, not the expression's collation -- this test is *not*
-        * dependent on the LIKE/regex operator's collation.
-        */
-       if (collation_aware)
-       {
-               if (!(pstatus == Pattern_Prefix_Exact ||
-                         lc_collate_is_c(indexcollation)))
-                       return NIL;
-       }
-
        /*
         * If necessary, coerce the prefix constant to the right type.  The given
         * prefix constant is either text or bytea type, therefore the only case
@@ -409,8 +412,31 @@ match_pattern_prefix(Node *leftop,
        }
 
        /*
-        * Otherwise, we have a nonempty required prefix of the values.
-        *
+        * Otherwise, we have a nonempty required prefix of the values.  Some
+        * opclasses support prefix checks directly, otherwise we'll try to
+        * generate a range constraint.
+        */
+       if (OidIsValid(preopr) && op_in_opfamily(preopr, opfamily))
+       {
+               expr = make_opclause(preopr, BOOLOID, false,
+                                                        (Expr *) leftop, (Expr *) prefix,
+                                                        InvalidOid, indexcollation);
+               result = list_make1(expr);
+               return result;
+       }
+
+       /*
+        * Since we need a range constraint, it's only going to work reliably if
+        * the index is collation-insensitive or has "C" collation.  Note that
+        * here we are looking at the index's collation, not the expression's
+        * collation -- this test is *not* dependent on the LIKE/regex operator's
+        * collation.
+        */
+       if (collation_aware &&
+               !lc_collate_is_c(indexcollation))
+               return NIL;
+
+       /*
         * We can always say "x >= prefix".
         */
        if (!op_in_opfamily(geopr, opfamily))
@@ -1165,7 +1191,6 @@ pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
                case Pattern_Type_Prefix:
                        /* Prefix type work is trivial.  */
                        result = Pattern_Prefix_Partial;
-                       *rest_selec = 1.0;      /* all */
                        *prefix = makeConst(patt->consttype,
                                                                patt->consttypmod,
                                                                patt->constcollid,
@@ -1175,6 +1200,8 @@ pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
                                                                                  patt->constlen),
                                                                patt->constisnull,
                                                                patt->constbyval);
+                       if (rest_selec != NULL)
+                               *rest_selec = 1.0;      /* all */
                        break;
                default:
                        elog(ERROR, "unrecognized ptype: %d", (int) ptype);
index 49e8e5912907b862e460092c54cbf4820aa7c8d0..cb7117df3ea2e3c5316d7763d12356391d1eb6b7 100644 (file)
@@ -53,6 +53,6 @@
  */
 
 /*                                                     yyyymmddN */
-#define CATALOG_VERSION_NO     202111091
+#define CATALOG_VERSION_NO     202111171
 
 #endif
index 89c73acd680772cc5256529a970153db14d9cefd..0075a02f32374e614b1144c22641e791050de783 100644 (file)
   oprright => 'text', oprresult => 'bool', oprcom => '=(text,text)',
   oprnegate => '<>(text,text)', oprcode => 'texteq', oprrest => 'eqsel',
   oprjoin => 'eqjoinsel' },
-{ oid => '3877', descr => 'starts with',
+{ oid => '3877', oid_symbol => 'TextPrefixOperator', descr => 'starts with',
   oprname => '^@', oprleft => 'text', oprright => 'text', oprresult => 'bool',
   oprcode => 'starts_with', oprrest => 'prefixsel',
   oprjoin => 'prefixjoinsel' },
index d068d6532ecfd357e3d4c39de83c6293e8712966..6412f369f18f68b9029ccfb424173c334420f6d5 100644 (file)
   proname => 'texteq', proleakproof => 't', prorettype => 'bool',
   proargtypes => 'text text', prosrc => 'texteq' },
 { oid => '3696',
-  proname => 'starts_with', proleakproof => 't', prorettype => 'bool',
-  proargtypes => 'text text', prosrc => 'text_starts_with' },
+  proname => 'starts_with', prosupport => 'text_starts_with_support',
+  proleakproof => 't', prorettype => 'bool', proargtypes => 'text text',
+  prosrc => 'text_starts_with' },
+{ oid => '8923', descr => 'planner support for text_starts_with',
+  proname => 'text_starts_with_support', prorettype => 'internal',
+  proargtypes => 'internal', prosrc => 'text_starts_with_support' },
 { oid => '68',
   proname => 'xideq', proleakproof => 't', prorettype => 'bool',
   proargtypes => 'xid xid', prosrc => 'xideq' },
index f4fb08a289ac7b6181b2c96f8c8d316219050b1e..5c04df9c01b14fc2dcc60d8ea7c2463f686575e2 100644 (file)
@@ -804,6 +804,22 @@ SELECT count(*) FROM radix_text_tbl WHERE t ^@      'Worth';
      2
 (1 row)
 
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM radix_text_tbl WHERE starts_with(t, 'Worth');
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Aggregate
+   ->  Index Only Scan using sp_radix_ind on radix_text_tbl
+         Index Cond: (t ^@ 'Worth'::text)
+         Filter: starts_with(t, 'Worth'::text)
+(4 rows)
+
+SELECT count(*) FROM radix_text_tbl WHERE starts_with(t, 'Worth');
+ count 
+-------
+     2
+(1 row)
+
 -- Now check the results from bitmap indexscan
 SET enable_seqscan = OFF;
 SET enable_indexscan = OFF;
@@ -1333,6 +1349,23 @@ SELECT count(*) FROM radix_text_tbl WHERE t ^@    'Worth';
      2
 (1 row)
 
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM radix_text_tbl WHERE starts_with(t, 'Worth');
+                   QUERY PLAN                   
+------------------------------------------------
+ Aggregate
+   ->  Bitmap Heap Scan on radix_text_tbl
+         Filter: starts_with(t, 'Worth'::text)
+         ->  Bitmap Index Scan on sp_radix_ind
+               Index Cond: (t ^@ 'Worth'::text)
+(5 rows)
+
+SELECT count(*) FROM radix_text_tbl WHERE starts_with(t, 'Worth');
+ count 
+-------
+     2
+(1 row)
+
 RESET enable_seqscan;
 RESET enable_indexscan;
 RESET enable_bitmapscan;
index b126dae6299ef11edbc15109686e3db9622ec011..660bfc619300d5901bd51621291b510adf1f39fb 100644 (file)
@@ -295,6 +295,10 @@ EXPLAIN (COSTS OFF)
 SELECT count(*) FROM radix_text_tbl WHERE t ^@  'Worth';
 SELECT count(*) FROM radix_text_tbl WHERE t ^@  'Worth';
 
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM radix_text_tbl WHERE starts_with(t, 'Worth');
+SELECT count(*) FROM radix_text_tbl WHERE starts_with(t, 'Worth');
+
 -- Now check the results from bitmap indexscan
 SET enable_seqscan = OFF;
 SET enable_indexscan = OFF;
@@ -424,6 +428,10 @@ EXPLAIN (COSTS OFF)
 SELECT count(*) FROM radix_text_tbl WHERE t ^@  'Worth';
 SELECT count(*) FROM radix_text_tbl WHERE t ^@  'Worth';
 
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM radix_text_tbl WHERE starts_with(t, 'Worth');
+SELECT count(*) FROM radix_text_tbl WHERE starts_with(t, 'Worth');
+
 RESET enable_seqscan;
 RESET enable_indexscan;
 RESET enable_bitmapscan;