Repair bug in regexp split performance improvements.

author Andrew Gierth <rhodiumtoad@postgresql.org>

Wed, 12 Sep 2018 18:31:06 +0000 (19:31 +0100)

committer Andrew Gierth <rhodiumtoad@postgresql.org>

Wed, 12 Sep 2018 18:31:06 +0000 (19:31 +0100)
author Andrew Gierth <rhodiumtoad@postgresql.org>
Wed, 12 Sep 2018 18:31:06 +0000 (19:31 +0100)
committer Andrew Gierth <rhodiumtoad@postgresql.org>
Wed, 12 Sep 2018 18:31:06 +0000 (19:31 +0100)
diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c

index d8b692123421b5e46e2b5c48f6e832b64a5def9e..171fcc8a448cc7b1e4f42b843821cef94dbfc5af 100644 (file)
--- a/src/backend/utils/adt/regexp.c
+++ b/src/backend/utils/adt/regexp.c
@@ -982,6 +982,7 @@ setup_regexp_matches(text *orig_str, text *pattern, pg_re_flags *re_flags,
     int         array_len;
     int         array_idx;
     int         prev_match_end;
+   int         prev_valid_match_end;
     int         start_search;
     int         maxlen = 0;     /* largest fetch length in characters */
  
@@ -1024,6 +1025,7 @@ setup_regexp_matches(text *orig_str, text *pattern, pg_re_flags *re_flags,
  
     /* search for the pattern, perhaps repeatedly */
     prev_match_end = 0;
+   prev_valid_match_end = 0;
     start_search = 0;
     while (RE_wchar_execute(cpattern, wide_str, wide_len, start_search,
                             pmatch_len, pmatch))
@@ -1076,13 +1078,15 @@ setup_regexp_matches(text *orig_str, text *pattern, pg_re_flags *re_flags,
             matchctx->nmatches++;
  
             /*
-            * check length of unmatched portion between end of previous match
-            * and start of current one
+            * check length of unmatched portion between end of previous valid
+            * (nondegenerate, or degenerate but not ignored) match and start
+            * of current one
              */
             if (fetching_unmatched &&
                 pmatch[0].rm_so >= 0 &&
-               (pmatch[0].rm_so - prev_match_end) > maxlen)
-               maxlen = (pmatch[0].rm_so - prev_match_end);
+               (pmatch[0].rm_so - prev_valid_match_end) > maxlen)
+               maxlen = (pmatch[0].rm_so - prev_valid_match_end);
+           prev_valid_match_end = pmatch[0].rm_eo;
         }
         prev_match_end = pmatch[0].rm_eo;
  
@@ -1108,8 +1112,8 @@ setup_regexp_matches(text *orig_str, text *pattern, pg_re_flags *re_flags,
      * input string
      */
     if (fetching_unmatched &&
-       (wide_len - prev_match_end) > maxlen)
-       maxlen = (wide_len - prev_match_end);
+       (wide_len - prev_valid_match_end) > maxlen)
+       maxlen = (wide_len - prev_valid_match_end);
  
     /*
      * Keep a note of the end position of the string for the benefit of
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out

index cbe66c375ca179a9bec2b55ce27b03512a2c0415..189bdffdca084cbf126620ebc3e9428485ff68f2 100644 (file)
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -674,6 +674,24 @@ SELECT regexp_split_to_array('123456','.');
   {"","","","","","",""}
  (1 row)
  
+SELECT regexp_split_to_array('123456','');
+ regexp_split_to_array 
+-----------------------
+ {1,2,3,4,5,6}
+(1 row)
+
+SELECT regexp_split_to_array('123456','(?:)');
+ regexp_split_to_array 
+-----------------------
+ {1,2,3,4,5,6}
+(1 row)
+
+SELECT regexp_split_to_array('1','');
+ regexp_split_to_array 
+-----------------------
+ {1}
+(1 row)
+
  -- errors
  SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', 'zippy') AS foo;
  ERROR:  invalid regexp option: "z"
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql

index 5a82237870e205f55a6e7ae7c869195758f82f5c..f2203ef1b1d23af8b5d797f1c6dcbb82d7113c1b 100644 (file)
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -188,6 +188,9 @@ SELECT regexp_split_to_array('the quick brown fox jumps over the lazy dog', 'nom
  SELECT regexp_split_to_array('123456','1');
  SELECT regexp_split_to_array('123456','6');
  SELECT regexp_split_to_array('123456','.');
+SELECT regexp_split_to_array('123456','');
+SELECT regexp_split_to_array('123456','(?:)');
+SELECT regexp_split_to_array('1','');
  -- errors
  SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', 'zippy') AS foo;
  SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', 'iz');
author	Andrew Gierth <rhodiumtoad@postgresql.org>
	Wed, 12 Sep 2018 18:31:06 +0000 (19:31 +0100)
committer	Andrew Gierth <rhodiumtoad@postgresql.org>
	Wed, 12 Sep 2018 18:31:06 +0000 (19:31 +0100)
src/backend/utils/adt/regexp.c		patch \| blob \| blame \| history
src/test/regress/expected/strings.out		patch \| blob \| blame \| history
src/test/regress/sql/strings.sql		patch \| blob \| blame \| history