summaryrefslogtreecommitdiff
path: root/src/test
diff options
context:
space:
mode:
authorNeil Conway2007-03-20 05:45:00 +0000
committerNeil Conway2007-03-20 05:45:00 +0000
commit9eb78beeae01f2f0ccafc5d66a2003ea7e3952f9 (patch)
tree8e4139ea736ab91319fff21b8654ef4d9150ac55 /src/test
parent5e96b04a7cb1f35aa1b75680f8bfbeac7cedc178 (diff)
Add three new regexp functions: regexp_matches, regexp_split_to_array,
and regexp_split_to_table. These functions provide access to the capture groups resulting from a POSIX regular expression match, and provide the ability to split a string on a POSIX regular expression, respectively. Patch from Jeremy Drake; code review by Neil Conway, additional comments and suggestions from Tom and Peter E. This patch bumps the catversion, adds some regression tests, and updates the docs.
Diffstat (limited to 'src/test')
-rw-r--r--src/test/regress/expected/strings.out227
-rw-r--r--src/test/regress/sql/strings.sql53
2 files changed, 277 insertions, 3 deletions
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index 20081f699b..e11dfc35d7 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -217,9 +217,232 @@ SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'gi');
Z Z
(1 row)
--- invalid option of REGEXP_REPLACE
+-- invalid regexp option
SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'z');
-ERROR: invalid option of regexp_replace: z
+ERROR: invalid regexp option: z
+-- set so we can tell NULL from empty string
+\pset null '\\N'
+-- return all matches from regexp
+SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$);
+ regexp_matches
+----------------
+ {bar,beque}
+(1 row)
+
+-- test case insensitive
+SELECT regexp_matches('foObARbEqUEbAz', $re$(bar)(beque)$re$, 'i');
+ regexp_matches
+----------------
+ {bAR,bEqUE}
+(1 row)
+
+-- global option - more than one match
+SELECT regexp_matches('foobarbequebazilbarfbonk', $re$(b[^b]+)(b[^b]+)$re$, 'g');
+ regexp_matches
+----------------
+ {bar,beque}
+ {bazil,barf}
+(2 rows)
+
+-- empty capture group (matched empty string)
+SELECT regexp_matches('foobarbequebaz', $re$(bar)(.*)(beque)$re$);
+ regexp_matches
+----------------
+ {bar,"",beque}
+(1 row)
+
+-- no match
+SELECT regexp_matches('foobarbequebaz', $re$(bar)(.+)(beque)$re$);
+ regexp_matches
+----------------
+(0 rows)
+
+-- optional capture group did not match, null entry in array
+SELECT regexp_matches('foobarbequebaz', $re$(bar)(.+)?(beque)$re$);
+ regexp_matches
+------------------
+ {bar,NULL,beque}
+(1 row)
+
+-- no capture groups
+SELECT regexp_matches('foobarbequebaz', $re$barbeque$re$);
+ regexp_matches
+----------------
+ {barbeque}
+(1 row)
+
+-- give me errors
+SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$, 'zipper');
+ERROR: invalid regexp option: z
+SELECT regexp_matches('foobarbequebaz', $re$(barbeque$re$);
+ERROR: invalid regular expression: parentheses () not balanced
+SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque){2,1}$re$);
+ERROR: invalid regular expression: invalid repetition count(s)
+-- split string on regexp
+SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', $re$\s+$re$) AS foo;
+ foo | length
+--------+--------
+ the | 3
+ quick | 5
+ brown | 5
+ fox | 3
+ jumped | 6
+ over | 4
+ the | 3
+ lazy | 4
+ dog | 3
+(9 rows)
+
+SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', $re$\s+$re$);
+ regexp_split_to_array
+------------------------------------------------
+ {the,quick,brown,fox,jumped,over,the,lazy,dog}
+(1 row)
+
+SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', $re$\s*$re$) AS foo;
+ foo | length
+-----+--------
+ t | 1
+ h | 1
+ e | 1
+ q | 1
+ u | 1
+ i | 1
+ c | 1
+ k | 1
+ b | 1
+ r | 1
+ o | 1
+ w | 1
+ n | 1
+ f | 1
+ o | 1
+ x | 1
+ j | 1
+ u | 1
+ m | 1
+ p | 1
+ e | 1
+ d | 1
+ o | 1
+ v | 1
+ e | 1
+ r | 1
+ t | 1
+ h | 1
+ e | 1
+ l | 1
+ a | 1
+ z | 1
+ y | 1
+ d | 1
+ o | 1
+ g | 1
+(36 rows)
+
+SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', $re$\s*$re$);
+ regexp_split_to_array
+---------------------------------------------------------------------------
+ {t,h,e,q,u,i,c,k,b,r,o,w,n,f,o,x,j,u,m,p,e,d,o,v,e,r,t,h,e,l,a,z,y,d,o,g}
+(1 row)
+
+SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', '') AS foo;
+ foo | length
+-----+--------
+ t | 1
+ h | 1
+ e | 1
+ | 1
+ q | 1
+ u | 1
+ i | 1
+ c | 1
+ k | 1
+ | 1
+ b | 1
+ r | 1
+ o | 1
+ w | 1
+ n | 1
+ | 1
+ f | 1
+ o | 1
+ x | 1
+ | 1
+ j | 1
+ u | 1
+ m | 1
+ p | 1
+ e | 1
+ d | 1
+ | 1
+ o | 1
+ v | 1
+ e | 1
+ r | 1
+ | 1
+ t | 1
+ h | 1
+ e | 1
+ | 1
+ l | 1
+ a | 1
+ z | 1
+ y | 1
+ | 1
+ d | 1
+ o | 1
+ g | 1
+(44 rows)
+
+SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', '');
+ regexp_split_to_array
+-----------------------------------------------------------------------------------------------------------
+ {t,h,e," ",q,u,i,c,k," ",b,r,o,w,n," ",f,o,x," ",j,u,m,p,e,d," ",o,v,e,r," ",t,h,e," ",l,a,z,y," ",d,o,g}
+(1 row)
+
+-- case insensitive
+SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'i') AS foo;
+ foo | length
+-----------------------+--------
+ th | 2
+ QUick bROWn FOx jUMP | 21
+ d ov | 4
+ r TH | 4
+ lazy dOG | 9
+(5 rows)
+
+SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'i');
+ regexp_split_to_array
+--------------------------------------------------------
+ {th," QUick bROWn FOx jUMP","d ov","r TH"," lazy dOG"}
+(1 row)
+
+-- no match of pattern
+SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', 'nomatch') AS foo;
+ foo | length
+----------------------------------------------+--------
+ the quick brown fox jumped over the lazy dog | 44
+(1 row)
+
+SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', 'nomatch');
+ regexp_split_to_array
+--------------------------------------------------
+ {"the quick brown fox jumped over the lazy dog"}
+(1 row)
+
+-- errors
+SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'zippy') AS foo;
+ERROR: invalid regexp option: z
+SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'zippy');
+ERROR: invalid regexp option: z
+-- global option meaningless for regexp_split
+SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'g') AS foo;
+ERROR: regexp_split does not support the global option
+SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'g');
+ERROR: regexp_split does not support the global option
+-- change NULL-display back
+\pset null ''
-- E021-11 position expression
SELECT POSITION('4' IN '1234567890') = '4' AS "4";
4
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index 570d9a27f4..389ff63517 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -85,9 +85,60 @@ SELECT regexp_replace('1112223333', E'(\\d{3})(\\d{3})(\\d{4})', E'(\\1) \\2-\\3
SELECT regexp_replace('AAA BBB CCC ', E'\\s+', ' ', 'g');
SELECT regexp_replace('AAA', '^|$', 'Z', 'g');
SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'gi');
--- invalid option of REGEXP_REPLACE
+-- invalid regexp option
SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'z');
+-- set so we can tell NULL from empty string
+\pset null '\\N'
+
+-- return all matches from regexp
+SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$);
+
+-- test case insensitive
+SELECT regexp_matches('foObARbEqUEbAz', $re$(bar)(beque)$re$, 'i');
+
+-- global option - more than one match
+SELECT regexp_matches('foobarbequebazilbarfbonk', $re$(b[^b]+)(b[^b]+)$re$, 'g');
+
+-- empty capture group (matched empty string)
+SELECT regexp_matches('foobarbequebaz', $re$(bar)(.*)(beque)$re$);
+-- no match
+SELECT regexp_matches('foobarbequebaz', $re$(bar)(.+)(beque)$re$);
+-- optional capture group did not match, null entry in array
+SELECT regexp_matches('foobarbequebaz', $re$(bar)(.+)?(beque)$re$);
+
+-- no capture groups
+SELECT regexp_matches('foobarbequebaz', $re$barbeque$re$);
+
+-- give me errors
+SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$, 'zipper');
+SELECT regexp_matches('foobarbequebaz', $re$(barbeque$re$);
+SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque){2,1}$re$);
+
+-- split string on regexp
+SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', $re$\s+$re$) AS foo;
+SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', $re$\s+$re$);
+
+SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', $re$\s*$re$) AS foo;
+SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', $re$\s*$re$);
+SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', '') AS foo;
+SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', '');
+-- case insensitive
+SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'i') AS foo;
+SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'i');
+-- no match of pattern
+SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', 'nomatch') AS foo;
+SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', 'nomatch');
+-- errors
+SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'zippy') AS foo;
+SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'zippy');
+-- global option meaningless for regexp_split
+SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'g') AS foo;
+SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'g');
+
+-- change NULL-display back
+\pset null ''
+
-- E021-11 position expression
SELECT POSITION('4' IN '1234567890') = '4' AS "4";