Implement regexp_match(), a simplified alternative to regexp_matches().

author Tom Lane <tgl@sss.pgh.pa.us>

Wed, 17 Aug 2016 22:32:56 +0000 (18:32 -0400)

committer Tom Lane <tgl@sss.pgh.pa.us>

Wed, 17 Aug 2016 22:33:01 +0000 (18:33 -0400)
author Tom Lane <tgl@sss.pgh.pa.us>
Wed, 17 Aug 2016 22:32:56 +0000 (18:32 -0400)
committer Tom Lane <tgl@sss.pgh.pa.us>
Wed, 17 Aug 2016 22:33:01 +0000 (18:33 -0400)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml

index 426e562b03680d999313a73ea5c02280e251d009..169a385a9cc9decdfadfde16a86ec95eaea7e397 100644 (file)
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -2036,6 +2036,23 @@
         <entry><literal>'42.5'</literal></entry>
        </row>
  
+      <row>
+       <entry>
+        <indexterm>
+         <primary>regexp_match</primary>
+        </indexterm>
+        <literal><function>regexp_match(<parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type> [, <parameter>flags</parameter> <type>text</type>])</function></literal>
+       </entry>
+       <entry><type>text[]</type></entry>
+       <entry>
+        Return captured substring(s) resulting from the first match of a POSIX
+        regular expression to the <parameter>string</parameter>. See
+        <xref linkend="functions-posix-regexp"> for more information.
+       </entry>
+       <entry><literal>regexp_match('foobarbequebaz', '(bar)(beque)')</literal></entry>
+       <entry><literal>{bar,beque}</literal></entry>
+      </row>
+
        <row>
         <entry>
          <indexterm>
@@ -2045,12 +2062,12 @@
         </entry>
         <entry><type>setof text[]</type></entry>
         <entry>
-        Return all captured substrings resulting from matching a POSIX regular
-        expression against the <parameter>string</parameter>. See
+        Return captured substring(s) resulting from matching a POSIX regular
+        expression to the <parameter>string</parameter>. See
          <xref linkend="functions-posix-regexp"> for more information.
         </entry>
-       <entry><literal>regexp_matches('foobarbequebaz', '(bar)(beque)')</literal></entry>
-       <entry><literal>{bar,beque}</literal></entry>
+       <entry><literal>regexp_matches('foobarbequebaz', 'ba.', 'g')</literal></entry>
+       <entry><literal>{bar}</literal><para><literal>{baz}</literal></para> (2 rows)</entry>
        </row>
  
        <row>
@@ -4112,6 +4129,9 @@ substring('foobar' from '#"o_b#"%' for '#')    <lineannotation>NULL</lineannotat
     <indexterm>
      <primary>regexp_replace</primary>
     </indexterm>
+   <indexterm>
+    <primary>regexp_match</primary>
+   </indexterm>
     <indexterm>
      <primary>regexp_matches</primary>
     </indexterm>
@@ -4272,64 +4292,106 @@ regexp_replace('foobarbaz', 'b(..)', E'X\\1Y', 'g')
     </para>
  
      <para>
-     The <function>regexp_matches</> function returns a text array of
-     all of the captured substrings resulting from matching a POSIX
-     regular expression pattern.  It has the syntax
-     <function>regexp_matches</function>(<replaceable>string</>, <replaceable>pattern</>
-     <optional>, <replaceable>flags</> </optional>).
-     The function can return no rows, one row, or multiple rows (see
-     the <literal>g</> flag below).  If the <replaceable>pattern</>
-     does not match, the function returns no rows.  If the pattern
-     contains no parenthesized subexpressions, then each row
-     returned is a single-element text array containing the substring
-     matching the whole pattern.  If the pattern contains parenthesized
-     subexpressions, the function returns a text array whose
-     <replaceable>n</>'th element is the substring matching the
-     <replaceable>n</>'th parenthesized subexpression of the pattern
-     (not counting <quote>non-capturing</> parentheses; see below for
-     details).
-     The <replaceable>flags</> parameter is an optional text
-     string containing zero or more single-letter flags that change the
-     function's behavior.  Flag <literal>g</> causes the function to find
-     each match in the string, not only the first one, and return a row for
-     each such match.  Supported flags (though
-     not <literal>g</>)
-     are described in <xref linkend="posix-embedded-options-table">.
+     The <function>regexp_match</> function returns a text array of
+     captured substring(s) resulting from the first match of a POSIX
+     regular expression pattern to a string.  It has the syntax
+     <function>regexp_match</function>(<replaceable>string</>,
+     <replaceable>pattern</> <optional>, <replaceable>flags</> </optional>).
+     If there is no match, the result is <literal>NULL</>.
+     If a match is found, and the <replaceable>pattern</> contains no
+     parenthesized subexpressions, then the result is a single-element text
+     array containing the substring matching the whole pattern.
+     If a match is found, and the <replaceable>pattern</> contains
+     parenthesized subexpressions, then the result is a text array
+     whose <replaceable>n</>'th element is the substring matching
+     the <replaceable>n</>'th parenthesized subexpression of
+     the <replaceable>pattern</> (not counting <quote>non-capturing</>
+     parentheses; see below for details).
+     The <replaceable>flags</> parameter is an optional text string
+     containing zero or more single-letter flags that change the function's
+     behavior.  Supported flags are described
+     in <xref linkend="posix-embedded-options-table">.
      </para>
  
     <para>
      Some examples:
  <programlisting>
-SELECT regexp_matches('foobarbequebaz', '(bar)(beque)');
- regexp_matches 
-----------------
+SELECT regexp_match('foobarbequebaz', 'bar.*que');
+ regexp_match
+--------------
+ {barbeque}
+(1 row)
+
+SELECT regexp_match('foobarbequebaz', '(bar)(beque)');
+ regexp_match
+--------------
   {bar,beque}
  (1 row)
+</programlisting>
+    In the common case where you just want the whole matching substring
+    or <literal>NULL</> for no match, write something like
+<programlisting>
+SELECT (regexp_match('foobarbequebaz', 'bar.*que'))[1];
+ regexp_match
+--------------
+ barbeque
+(1 row)
+</programlisting>
+   </para>
+
+    <para>
+     The <function>regexp_matches</> function returns a set of text arrays
+     of captured substring(s) resulting from matching a POSIX regular
+     expression pattern to a string.  It has the same syntax as
+     <function>regexp_match</function>.
+     This function returns no rows if there is no match, one row if there is
+     a match and the <literal>g</> flag is not given, or <replaceable>N</>
+     rows if there are <replaceable>N</> matches and the <literal>g</> flag
+     is given.  Each returned row is a text array containing the whole
+     matched substring or the substrings matching parenthesized
+     subexpressions of the <replaceable>pattern</>, just as described above
+     for <function>regexp_match</function>.
+     <function>regexp_matches</> accepts all the flags shown
+     in <xref linkend="posix-embedded-options-table">, plus
+     the <literal>g</> flag which commands it to return all matches, not
+     just the first one.
+    </para>
+
+   <para>
+    Some examples:
+<programlisting>
+ SELECT regexp_matches('foo', 'not there');
+ regexp_matches
+----------------
+(0 rows)
  
  SELECT regexp_matches('foobarbequebazilbarfbonk', '(b[^b]+)(b[^b]+)', 'g');
- regexp_matches 
+ regexp_matches
  ----------------
   {bar,beque}
   {bazil,barf}
  (2 rows)
-
-SELECT regexp_matches('foobarbequebaz', 'barbeque');
- regexp_matches 
-----------------
- {barbeque}
-(1 row)
  </programlisting>
     </para>
  
-   <para>
-    It is possible to force <function>regexp_matches()</> to always
-    return one row by using a sub-select;  this is particularly useful
-    in a <literal>SELECT</> target list when you want all rows
-    returned, even non-matching ones:
+   <tip>
+    <para>
+     In most cases <function>regexp_matches()</> should be used with
+     the <literal>g</> flag, since if you only want the first match, it's
+     easier and more efficient to use <function>regexp_match()</>.
+     However, <function>regexp_match()</> only exists
+     in <productname>PostgreSQL</> version 10 and up.  When working in older
+     versions, a common trick is to place a <function>regexp_matches()</>
+     call in a sub-select, for example:
  <programlisting>
  SELECT col1, (SELECT regexp_matches(col2, '(bar)(beque)')) FROM tab;
  </programlisting>
-   </para>
+     This produces a text array if there's a match, or <literal>NULL</> if
+     not, the same as <function>regexp_match()</> would do.  Without the
+     sub-select, this query would produce no output at all for table rows
+     without a match, which is typically not the desired behavior.
+    </para>
+   </tip>
  
      <para>
       The <function>regexp_split_to_table</> function splits a string using a POSIX
@@ -4408,6 +4470,7 @@ SELECT foo FROM regexp_split_to_table('the quick brown fox', E'\\s*') AS foo;
      zero-length matches that occur at the start or end of the string
      or immediately after a previous match.  This is contrary to the strict
      definition of regexp matching that is implemented by
+    <function>regexp_match</> and
      <function>regexp_matches</>, but is usually the most convenient behavior
      in practice.  Other software systems such as Perl use similar definitions.
     </para>
@@ -5482,7 +5545,7 @@ SELECT SUBSTRING('XY1234Z', 'Y*?([0-9]{1,3})');
      into the digits and the parts before and after them.  We might try to
      do that like this:
  <screen>
-SELECT regexp_matches('abc01234xyz', '(.*)(\d+)(.*)');
+SELECT regexp_match('abc01234xyz', '(.*)(\d+)(.*)');
  <lineannotation>Result: </lineannotation><computeroutput>{abc0123,4,xyz}</computeroutput>
  </screen>
      That didn't work: the first <literal>.*</> is greedy so
@@ -5490,14 +5553,14 @@ SELECT regexp_matches('abc01234xyz', '(.*)(\d+)(.*)');
      match at the last possible place, the last digit.  We might try to fix
      that by making it non-greedy:
  <screen>
-SELECT regexp_matches('abc01234xyz', '(.*?)(\d+)(.*)');
+SELECT regexp_match('abc01234xyz', '(.*?)(\d+)(.*)');
  <lineannotation>Result: </lineannotation><computeroutput>{abc,0,""}</computeroutput>
  </screen>
      That didn't work either, because now the RE as a whole is non-greedy
      and so it ends the overall match as soon as possible.  We can get what
      we want by forcing the RE as a whole to be greedy:
  <screen>
-SELECT regexp_matches('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}');
+SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}');
  <lineannotation>Result: </lineannotation><computeroutput>{abc,01234,xyz}</computeroutput>
  </screen>
      Controlling the RE's overall greediness separately from its components'
diff --git a/src/backend/catalog/information_schema.sql b/src/backend/catalog/information_schema.sql

index 18be08fead5762970e7a9718422c6fd0835bd209..00550eb8044f2c410d5cf0d03ab84b0027102721 100644 (file)
--- a/src/backend/catalog/information_schema.sql
+++ b/src/backend/catalog/information_schema.sql
@@ -2068,7 +2068,7 @@ CREATE VIEW triggers AS
             -- XXX strange hacks follow
             CAST(
               CASE WHEN pg_has_role(c.relowner, 'USAGE')
-               THEN (SELECT m[1] FROM regexp_matches(pg_get_triggerdef(t.oid), E'.{35,} WHEN \\((.+)\\) EXECUTE PROCEDURE') AS rm(m) LIMIT 1)
+               THEN (regexp_match(pg_get_triggerdef(t.oid), E'.{35,} WHEN \\((.+)\\) EXECUTE PROCEDURE'))[1]
                 ELSE null END
               AS character_data) AS action_condition,
             CAST(
diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c

index 5b216e0b721a28ac60de5483e1665facc6674e75..bc5e34e222bcc752a7153edfc641e6c710e39cd7 100644 (file)
--- a/src/backend/utils/adt/regexp.c
+++ b/src/backend/utils/adt/regexp.c
@@ -47,7 +47,7 @@ typedef struct pg_re_flags
         bool            glob;                   /* do it globally (for each occurrence) */
  } pg_re_flags;
  
-/* cross-call state for regexp_matches(), also regexp_split() */
+/* cross-call state for regexp_match and regexp_split functions */
  typedef struct regexp_matches_ctx
  {
         text       *orig_str;           /* data string in original TEXT form */
@@ -57,7 +57,7 @@ typedef struct regexp_matches_ctx
         /* so the number of entries in match_locs is nmatches * npatterns * 2 */
         int                *match_locs;         /* 0-based character indexes */
         int                     next_match;             /* 0-based index of next match to process */
-       /* workspace for build_regexp_matches_result() */
+       /* workspace for build_regexp_match_result() */
         Datum      *elems;                      /* has npatterns elements */
         bool       *nulls;                      /* has npatterns elements */
  } regexp_matches_ctx;
@@ -107,13 +107,12 @@ static cached_re_str re_array[MAX_CACHED_RES];    /* cached re's */
  
  /* Local functions */
  static regexp_matches_ctx *setup_regexp_matches(text *orig_str, text *pattern,
-                                        text *flags,
+                                        pg_re_flags *flags,
                                          Oid collation,
-                                        bool force_glob,
                                          bool use_subpatterns,
                                          bool ignore_degenerate);
  static void cleanup_regexp_matches(regexp_matches_ctx *matchctx);
-static ArrayType *build_regexp_matches_result(regexp_matches_ctx *matchctx);
+static ArrayType *build_regexp_match_result(regexp_matches_ctx *matchctx);
  static Datum build_regexp_split_result(regexp_matches_ctx *splitctx);
  
  
@@ -350,7 +349,7 @@ RE_compile_and_execute(text *text_re, char *dat, int dat_len,
  
  
  /*
- * parse_re_flags - parse the options argument of regexp_matches and friends
+ * parse_re_flags - parse the options argument of regexp_match and friends
   *
   *     flags --- output argument, filled with desired options
   *     opts --- TEXT object, or NULL for defaults
@@ -840,9 +839,53 @@ similar_escape(PG_FUNCTION_ARGS)
         PG_RETURN_TEXT_P(result);
  }
  
+/*
+ * regexp_match()
+ *             Return the first substring(s) matching a pattern within a string.
+ */
+Datum
+regexp_match(PG_FUNCTION_ARGS)
+{
+       text       *orig_str = PG_GETARG_TEXT_PP(0);
+       text       *pattern = PG_GETARG_TEXT_PP(1);
+       text       *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2);
+       pg_re_flags re_flags;
+       regexp_matches_ctx *matchctx;
+
+       /* Determine options */
+       parse_re_flags(&re_flags, flags);
+       /* User mustn't specify 'g' */
+       if (re_flags.glob)
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                errmsg("regexp_match does not support the global option"),
+                                errhint("Use the regexp_matches function instead.")));
+
+       matchctx = setup_regexp_matches(orig_str, pattern, &re_flags,
+                                                                       PG_GET_COLLATION(), true, false);
+
+       if (matchctx->nmatches == 0)
+               PG_RETURN_NULL();
+
+       Assert(matchctx->nmatches == 1);
+
+       /* Create workspace that build_regexp_match_result needs */
+       matchctx->elems = (Datum *) palloc(sizeof(Datum) * matchctx->npatterns);
+       matchctx->nulls = (bool *) palloc(sizeof(bool) * matchctx->npatterns);
+
+       PG_RETURN_DATUM(PointerGetDatum(build_regexp_match_result(matchctx)));
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_match_no_flags(PG_FUNCTION_ARGS)
+{
+       return regexp_match(fcinfo);
+}
+
  /*
   * regexp_matches()
- *             Return a table of matches of a pattern within a string.
+ *             Return a table of all matches of a pattern within a string.
   */
  Datum
  regexp_matches(PG_FUNCTION_ARGS)
@@ -854,18 +897,22 @@ regexp_matches(PG_FUNCTION_ARGS)
         {
                 text       *pattern = PG_GETARG_TEXT_PP(1);
                 text       *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2);
+               pg_re_flags re_flags;
                 MemoryContext oldcontext;
  
                 funcctx = SRF_FIRSTCALL_INIT();
                 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
  
+               /* Determine options */
+               parse_re_flags(&re_flags, flags);
+
                 /* be sure to copy the input string into the multi-call ctx */
                 matchctx = setup_regexp_matches(PG_GETARG_TEXT_P_COPY(0), pattern,
-                                                                               flags,
+                                                                               &re_flags,
                                                                                 PG_GET_COLLATION(),
-                                                                               false, true, false);
+                                                                               true, false);
  
-               /* Pre-create workspace that build_regexp_matches_result needs */
+               /* Pre-create workspace that build_regexp_match_result needs */
                 matchctx->elems = (Datum *) palloc(sizeof(Datum) * matchctx->npatterns);
                 matchctx->nulls = (bool *) palloc(sizeof(bool) * matchctx->npatterns);
  
@@ -880,7 +927,7 @@ regexp_matches(PG_FUNCTION_ARGS)
         {
                 ArrayType  *result_ary;
  
-               result_ary = build_regexp_matches_result(matchctx);
+               result_ary = build_regexp_match_result(matchctx);
                 matchctx->next_match++;
                 SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary));
         }
@@ -899,28 +946,27 @@ regexp_matches_no_flags(PG_FUNCTION_ARGS)
  }
  
  /*
- * setup_regexp_matches --- do the initial matching for regexp_matches()
- *             or regexp_split()
+ * setup_regexp_matches --- do the initial matching for regexp_match
+ *             and regexp_split functions
   *
   * To avoid having to re-find the compiled pattern on each call, we do
   * all the matching in one swoop.  The returned regexp_matches_ctx contains
   * the locations of all the substrings matching the pattern.
   *
- * The three bool parameters have only two patterns (one for each caller)
- * but it seems clearer to distinguish the functionality this way than to
- * key it all off one "is_split" flag.
+ * The two bool parameters have only two patterns (one for matching, one for
+ * splitting) but it seems clearer to distinguish the functionality this way
+ * than to key it all off one "is_split" flag.
   */
  static regexp_matches_ctx *
-setup_regexp_matches(text *orig_str, text *pattern, text *flags,
+setup_regexp_matches(text *orig_str, text *pattern, pg_re_flags *re_flags,
                                          Oid collation,
-                                        bool force_glob, bool use_subpatterns,
+                                        bool use_subpatterns,
                                          bool ignore_degenerate)
  {
         regexp_matches_ctx *matchctx = palloc0(sizeof(regexp_matches_ctx));
         int                     orig_len;
         pg_wchar   *wide_str;
         int                     wide_len;
-       pg_re_flags re_flags;
         regex_t    *cpattern;
         regmatch_t *pmatch;
         int                     pmatch_len;
@@ -937,21 +983,8 @@ setup_regexp_matches(text *orig_str, text *pattern, text *flags,
         wide_str = (pg_wchar *) palloc(sizeof(pg_wchar) * (orig_len + 1));
         wide_len = pg_mb2wchar_with_len(VARDATA_ANY(orig_str), wide_str, orig_len);
  
-       /* determine options */
-       parse_re_flags(&re_flags, flags);
-       if (force_glob)
-       {
-               /* user mustn't specify 'g' for regexp_split */
-               if (re_flags.glob)
-                       ereport(ERROR,
-                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                                errmsg("regexp_split does not support the global option")));
-               /* but we find all the matches anyway */
-               re_flags.glob = true;
-       }
-
         /* set up the compiled pattern */
-       cpattern = RE_compile_and_cache(pattern, re_flags.cflags, collation);
+       cpattern = RE_compile_and_cache(pattern, re_flags->cflags, collation);
  
         /* do we want to remember subpatterns? */
         if (use_subpatterns && cpattern->re_nsub > 0)
@@ -970,7 +1003,7 @@ setup_regexp_matches(text *orig_str, text *pattern, text *flags,
         pmatch = palloc(sizeof(regmatch_t) * pmatch_len);
  
         /* the real output space (grown dynamically if needed) */
-       array_len = re_flags.glob ? 256 : 32;
+       array_len = re_flags->glob ? 256 : 32;
         matchctx->match_locs = (int *) palloc(sizeof(int) * array_len);
         array_idx = 0;
  
@@ -1018,7 +1051,7 @@ setup_regexp_matches(text *orig_str, text *pattern, text *flags,
                 prev_match_end = pmatch[0].rm_eo;
  
                 /* if not glob, stop after one match */
-               if (!re_flags.glob)
+               if (!re_flags->glob)
                         break;
  
                 /*
@@ -1057,10 +1090,10 @@ cleanup_regexp_matches(regexp_matches_ctx *matchctx)
  }
  
  /*
- * build_regexp_matches_result - build output array for current match
+ * build_regexp_match_result - build output array for current match
   */
  static ArrayType *
-build_regexp_matches_result(regexp_matches_ctx *matchctx)
+build_regexp_match_result(regexp_matches_ctx *matchctx)
  {
         Datum      *elems = matchctx->elems;
         bool       *nulls = matchctx->nulls;
@@ -1114,16 +1147,27 @@ regexp_split_to_table(PG_FUNCTION_ARGS)
         {
                 text       *pattern = PG_GETARG_TEXT_PP(1);
                 text       *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2);
+               pg_re_flags re_flags;
                 MemoryContext oldcontext;
  
                 funcctx = SRF_FIRSTCALL_INIT();
                 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
  
+               /* Determine options */
+               parse_re_flags(&re_flags, flags);
+               /* User mustn't specify 'g' */
+               if (re_flags.glob)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                        errmsg("regexp_split_to_table does not support the global option")));
+               /* But we find all the matches anyway */
+               re_flags.glob = true;
+
                 /* be sure to copy the input string into the multi-call ctx */
                 splitctx = setup_regexp_matches(PG_GETARG_TEXT_P_COPY(0), pattern,
-                                                                               flags,
+                                                                               &re_flags,
                                                                                 PG_GET_COLLATION(),
-                                                                               true, false, true);
+                                                                               false, true);
  
                 MemoryContextSwitchTo(oldcontext);
                 funcctx->user_fctx = (void *) splitctx;
@@ -1162,13 +1206,24 @@ Datum
  regexp_split_to_array(PG_FUNCTION_ARGS)
  {
         ArrayBuildState *astate = NULL;
+       pg_re_flags re_flags;
         regexp_matches_ctx *splitctx;
  
+       /* Determine options */
+       parse_re_flags(&re_flags, PG_GETARG_TEXT_PP_IF_EXISTS(2));
+       /* User mustn't specify 'g' */
+       if (re_flags.glob)
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+               errmsg("regexp_split_to_array does not support the global option")));
+       /* But we find all the matches anyway */
+       re_flags.glob = true;
+
         splitctx = setup_regexp_matches(PG_GETARG_TEXT_PP(0),
                                                                         PG_GETARG_TEXT_PP(1),
-                                                                       PG_GETARG_TEXT_PP_IF_EXISTS(2),
+                                                                       &re_flags,
                                                                         PG_GET_COLLATION(),
-                                                                       true, false, true);
+                                                                       false, true);
  
         while (splitctx->next_match <= splitctx->nmatches)
         {
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h

index 82810c8fbaeadd847961e2ec65f7e8cac25c78a0..fb356bf3cd893972f4a63269839998eff65d35d9 100644 (file)
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
   */
  
  /*                                                     yyyymmddN */
-#define CATALOG_VERSION_NO     201608161
+#define CATALOG_VERSION_NO     201608171
  
  #endif
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h

index af19c1a82b6c7e42378dc354fd753fbb15cb7870..6fed7a0d19897940292e66971c53695369b1f68e 100644 (file)
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -1912,10 +1912,14 @@ DATA(insert OID =  2284 ( regexp_replace           PGNSP PGUID 12 1 0 0 0 f f f f t f i
  DESCR("replace text using regexp");
  DATA(insert OID =  2285 ( regexp_replace          PGNSP PGUID 12 1 0 0 0 f f f f t f i s 4 0 25 "25 25 25 25" _null_ _null_ _null_ _null_ _null_ textregexreplace _null_ _null_ _null_ ));
  DESCR("replace text using regexp");
+DATA(insert OID =  3396 ( regexp_match    PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 1009 "25 25" _null_ _null_ _null_ _null_ _null_ regexp_match_no_flags _null_ _null_ _null_ ));
+DESCR("find first match for regexp");
+DATA(insert OID =  3397 ( regexp_match    PGNSP PGUID 12 1 0 0 0 f f f f t f i s 3 0 1009 "25 25 25" _null_ _null_ _null_ _null_ _null_ regexp_match _null_ _null_ _null_ ));
+DESCR("find first match for regexp");
  DATA(insert OID =  2763 ( regexp_matches   PGNSP PGUID 12 1 1 0 0 f f f f t t i s 2 0 1009 "25 25" _null_ _null_ _null_ _null_ _null_ regexp_matches_no_flags _null_ _null_ _null_ ));
-DESCR("find all match groups for regexp");
+DESCR("find match(es) for regexp");
  DATA(insert OID =  2764 ( regexp_matches   PGNSP PGUID 12 1 10 0 0 f f f f t t i s 3 0 1009 "25 25 25" _null_ _null_ _null_ _null_ _null_ regexp_matches _null_ _null_ _null_ ));
-DESCR("find all match groups for regexp");
+DESCR("find match(es) for regexp");
  DATA(insert OID =  2088 ( split_part   PGNSP PGUID 12 1 0 0 0 f f f f t f i s 3 0 25 "25 25 23" _null_ _null_ _null_ _null_ _null_     split_text _null_ _null_ _null_ ));
  DESCR("split string by field_sep and return field_num");
  DATA(insert OID =  2765 ( regexp_split_to_table PGNSP PGUID 12 1 1000 0 0 f f f f t t i s 2 0 25 "25 25" _null_ _null_ _null_ _null_ _null_ regexp_split_to_table_no_flags _null_ _null_ _null_ ));
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h

index a91be981b9873bd114c16d1459b7df7be02c663e..40e25c8824719f3da8ff71173535ab4cb7aba7c2 100644 (file)
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -628,6 +628,8 @@ extern Datum textregexsubstr(PG_FUNCTION_ARGS);
  extern Datum textregexreplace_noopt(PG_FUNCTION_ARGS);
  extern Datum textregexreplace(PG_FUNCTION_ARGS);
  extern Datum similar_escape(PG_FUNCTION_ARGS);
+extern Datum regexp_match(PG_FUNCTION_ARGS);
+extern Datum regexp_match_no_flags(PG_FUNCTION_ARGS);
  extern Datum regexp_matches(PG_FUNCTION_ARGS);
  extern Datum regexp_matches_no_flags(PG_FUNCTION_ARGS);
  extern Datum regexp_split_to_table(PG_FUNCTION_ARGS);
diff --git a/src/test/regress/expected/regex.out b/src/test/regress/expected/regex.out

index af097193c52dee740033f31db4e8a76b79af8a55..79a7fa7a845ec634554207f05bf15f3be2167ce8 100644 (file)
--- a/src/test/regress/expected/regex.out
+++ b/src/test/regress/expected/regex.out
@@ -90,6 +90,34 @@ select substring('a' from '((a)+)');
   a
  (1 row)
  
+-- Test regexp_match()
+select regexp_match('abc', '');
+ regexp_match 
+--------------
+ {""}
+(1 row)
+
+select regexp_match('abc', 'bc');
+ regexp_match 
+--------------
+ {bc}
+(1 row)
+
+select regexp_match('abc', 'd') is null;
+ ?column? 
+----------
+ t
+(1 row)
+
+select regexp_match('abc', '(B)(c)', 'i');
+ regexp_match 
+--------------
+ {b,c}
+(1 row)
+
+select regexp_match('abc', 'Bd', 'ig'); -- error
+ERROR:  regexp_match does not support the global option
+HINT:  Use the regexp_matches function instead.
  -- Test lookahead constraints
  select regexp_matches('ab', 'a(?=b)b*');
   regexp_matches 
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out

index 19708c32fdd30b9566050b55ef82843b72989923..35cadb24aa1f61d91185d0bc9a0eda2ac59d8d4b 100644 (file)
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -681,9 +681,9 @@ SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e',
  ERROR:  invalid regexp option: "z"
  -- global option meaningless for regexp_split
  SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', 'g') AS foo;
-ERROR:  regexp_split does not support the global option
+ERROR:  regexp_split_to_table does not support the global option
  SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', 'g');
-ERROR:  regexp_split does not support the global option
+ERROR:  regexp_split_to_array does not support the global option
  -- change NULL-display back
  \pset null ''
  -- E021-11 position expression
diff --git a/src/test/regress/sql/regex.sql b/src/test/regress/sql/regex.sql

index 1028ca6dcdcd0e8cae17c13ce4281af678c45708..1361b625707608e10affbdbb22d7e5305b2b08ee 100644 (file)
--- a/src/test/regress/sql/regex.sql
+++ b/src/test/regress/sql/regex.sql
@@ -25,6 +25,13 @@ select substring('asd TO foo' from ' TO (([a-z0-9._]+|"([^"]+|"")+")+)');
  select substring('a' from '((a))+');
  select substring('a' from '((a)+)');
  
+-- Test regexp_match()
+select regexp_match('abc', '');
+select regexp_match('abc', 'bc');
+select regexp_match('abc', 'd') is null;
+select regexp_match('abc', '(B)(c)', 'i');
+select regexp_match('abc', 'Bd', 'ig'); -- error
+
  -- Test lookahead constraints
  select regexp_matches('ab', 'a(?=b)b*');
  select regexp_matches('a', 'a(?=b)b*');
author	Tom Lane <tgl@sss.pgh.pa.us>
	Wed, 17 Aug 2016 22:32:56 +0000 (18:32 -0400)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Wed, 17 Aug 2016 22:33:01 +0000 (18:33 -0400)
doc/src/sgml/func.sgml		patch \| blob \| blame \| history
src/backend/catalog/information_schema.sql		patch \| blob \| blame \| history
src/backend/utils/adt/regexp.c		patch \| blob \| blame \| history
src/include/catalog/catversion.h		patch \| blob \| blame \| history
src/include/catalog/pg_proc.h		patch \| blob \| blame \| history
src/include/utils/builtins.h		patch \| blob \| blame \| history
src/test/regress/expected/regex.out		patch \| blob \| blame \| history
src/test/regress/expected/strings.out		patch \| blob \| blame \| history
src/test/regress/sql/regex.sql		patch \| blob \| blame \| history