diff options
| author | Neil Conway | 2007-03-20 05:45:00 +0000 |
|---|---|---|
| committer | Neil Conway | 2007-03-20 05:45:00 +0000 |
| commit | 9eb78beeae01f2f0ccafc5d66a2003ea7e3952f9 (patch) | |
| tree | 8e4139ea736ab91319fff21b8654ef4d9150ac55 /src/test | |
| parent | 5e96b04a7cb1f35aa1b75680f8bfbeac7cedc178 (diff) | |
Add three new regexp functions: regexp_matches, regexp_split_to_array,
and regexp_split_to_table. These functions provide access to the
capture groups resulting from a POSIX regular expression match,
and provide the ability to split a string on a POSIX regular
expression, respectively. Patch from Jeremy Drake; code review by
Neil Conway, additional comments and suggestions from Tom and
Peter E.
This patch bumps the catversion, adds some regression tests,
and updates the docs.
Diffstat (limited to 'src/test')
| -rw-r--r-- | src/test/regress/expected/strings.out | 227 | ||||
| -rw-r--r-- | src/test/regress/sql/strings.sql | 53 |
2 files changed, 277 insertions, 3 deletions
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out index 20081f699b..e11dfc35d7 100644 --- a/src/test/regress/expected/strings.out +++ b/src/test/regress/expected/strings.out @@ -217,9 +217,232 @@ SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'gi'); Z Z (1 row) --- invalid option of REGEXP_REPLACE +-- invalid regexp option SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'z'); -ERROR: invalid option of regexp_replace: z +ERROR: invalid regexp option: z +-- set so we can tell NULL from empty string +\pset null '\\N' +-- return all matches from regexp +SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$); + regexp_matches +---------------- + {bar,beque} +(1 row) + +-- test case insensitive +SELECT regexp_matches('foObARbEqUEbAz', $re$(bar)(beque)$re$, 'i'); + regexp_matches +---------------- + {bAR,bEqUE} +(1 row) + +-- global option - more than one match +SELECT regexp_matches('foobarbequebazilbarfbonk', $re$(b[^b]+)(b[^b]+)$re$, 'g'); + regexp_matches +---------------- + {bar,beque} + {bazil,barf} +(2 rows) + +-- empty capture group (matched empty string) +SELECT regexp_matches('foobarbequebaz', $re$(bar)(.*)(beque)$re$); + regexp_matches +---------------- + {bar,"",beque} +(1 row) + +-- no match +SELECT regexp_matches('foobarbequebaz', $re$(bar)(.+)(beque)$re$); + regexp_matches +---------------- +(0 rows) + +-- optional capture group did not match, null entry in array +SELECT regexp_matches('foobarbequebaz', $re$(bar)(.+)?(beque)$re$); + regexp_matches +------------------ + {bar,NULL,beque} +(1 row) + +-- no capture groups +SELECT regexp_matches('foobarbequebaz', $re$barbeque$re$); + regexp_matches +---------------- + {barbeque} +(1 row) + +-- give me errors +SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$, 'zipper'); +ERROR: invalid regexp option: z +SELECT regexp_matches('foobarbequebaz', $re$(barbeque$re$); +ERROR: invalid regular expression: parentheses () not balanced +SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque){2,1}$re$); +ERROR: invalid regular expression: invalid repetition count(s) +-- split string on regexp +SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', $re$\s+$re$) AS foo; + foo | length +--------+-------- + the | 3 + quick | 5 + brown | 5 + fox | 3 + jumped | 6 + over | 4 + the | 3 + lazy | 4 + dog | 3 +(9 rows) + +SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', $re$\s+$re$); + regexp_split_to_array +------------------------------------------------ + {the,quick,brown,fox,jumped,over,the,lazy,dog} +(1 row) + +SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', $re$\s*$re$) AS foo; + foo | length +-----+-------- + t | 1 + h | 1 + e | 1 + q | 1 + u | 1 + i | 1 + c | 1 + k | 1 + b | 1 + r | 1 + o | 1 + w | 1 + n | 1 + f | 1 + o | 1 + x | 1 + j | 1 + u | 1 + m | 1 + p | 1 + e | 1 + d | 1 + o | 1 + v | 1 + e | 1 + r | 1 + t | 1 + h | 1 + e | 1 + l | 1 + a | 1 + z | 1 + y | 1 + d | 1 + o | 1 + g | 1 +(36 rows) + +SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', $re$\s*$re$); + regexp_split_to_array +--------------------------------------------------------------------------- + {t,h,e,q,u,i,c,k,b,r,o,w,n,f,o,x,j,u,m,p,e,d,o,v,e,r,t,h,e,l,a,z,y,d,o,g} +(1 row) + +SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', '') AS foo; + foo | length +-----+-------- + t | 1 + h | 1 + e | 1 + | 1 + q | 1 + u | 1 + i | 1 + c | 1 + k | 1 + | 1 + b | 1 + r | 1 + o | 1 + w | 1 + n | 1 + | 1 + f | 1 + o | 1 + x | 1 + | 1 + j | 1 + u | 1 + m | 1 + p | 1 + e | 1 + d | 1 + | 1 + o | 1 + v | 1 + e | 1 + r | 1 + | 1 + t | 1 + h | 1 + e | 1 + | 1 + l | 1 + a | 1 + z | 1 + y | 1 + | 1 + d | 1 + o | 1 + g | 1 +(44 rows) + +SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', ''); + regexp_split_to_array +----------------------------------------------------------------------------------------------------------- + {t,h,e," ",q,u,i,c,k," ",b,r,o,w,n," ",f,o,x," ",j,u,m,p,e,d," ",o,v,e,r," ",t,h,e," ",l,a,z,y," ",d,o,g} +(1 row) + +-- case insensitive +SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'i') AS foo; + foo | length +-----------------------+-------- + th | 2 + QUick bROWn FOx jUMP | 21 + d ov | 4 + r TH | 4 + lazy dOG | 9 +(5 rows) + +SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'i'); + regexp_split_to_array +-------------------------------------------------------- + {th," QUick bROWn FOx jUMP","d ov","r TH"," lazy dOG"} +(1 row) + +-- no match of pattern +SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', 'nomatch') AS foo; + foo | length +----------------------------------------------+-------- + the quick brown fox jumped over the lazy dog | 44 +(1 row) + +SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', 'nomatch'); + regexp_split_to_array +-------------------------------------------------- + {"the quick brown fox jumped over the lazy dog"} +(1 row) + +-- errors +SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'zippy') AS foo; +ERROR: invalid regexp option: z +SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'zippy'); +ERROR: invalid regexp option: z +-- global option meaningless for regexp_split +SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'g') AS foo; +ERROR: regexp_split does not support the global option +SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'g'); +ERROR: regexp_split does not support the global option +-- change NULL-display back +\pset null '' -- E021-11 position expression SELECT POSITION('4' IN '1234567890') = '4' AS "4"; 4 diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql index 570d9a27f4..389ff63517 100644 --- a/src/test/regress/sql/strings.sql +++ b/src/test/regress/sql/strings.sql @@ -85,9 +85,60 @@ SELECT regexp_replace('1112223333', E'(\\d{3})(\\d{3})(\\d{4})', E'(\\1) \\2-\\3 SELECT regexp_replace('AAA BBB CCC ', E'\\s+', ' ', 'g'); SELECT regexp_replace('AAA', '^|$', 'Z', 'g'); SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'gi'); --- invalid option of REGEXP_REPLACE +-- invalid regexp option SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'z'); +-- set so we can tell NULL from empty string +\pset null '\\N' + +-- return all matches from regexp +SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$); + +-- test case insensitive +SELECT regexp_matches('foObARbEqUEbAz', $re$(bar)(beque)$re$, 'i'); + +-- global option - more than one match +SELECT regexp_matches('foobarbequebazilbarfbonk', $re$(b[^b]+)(b[^b]+)$re$, 'g'); + +-- empty capture group (matched empty string) +SELECT regexp_matches('foobarbequebaz', $re$(bar)(.*)(beque)$re$); +-- no match +SELECT regexp_matches('foobarbequebaz', $re$(bar)(.+)(beque)$re$); +-- optional capture group did not match, null entry in array +SELECT regexp_matches('foobarbequebaz', $re$(bar)(.+)?(beque)$re$); + +-- no capture groups +SELECT regexp_matches('foobarbequebaz', $re$barbeque$re$); + +-- give me errors +SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$, 'zipper'); +SELECT regexp_matches('foobarbequebaz', $re$(barbeque$re$); +SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque){2,1}$re$); + +-- split string on regexp +SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', $re$\s+$re$) AS foo; +SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', $re$\s+$re$); + +SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', $re$\s*$re$) AS foo; +SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', $re$\s*$re$); +SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', '') AS foo; +SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', ''); +-- case insensitive +SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'i') AS foo; +SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'i'); +-- no match of pattern +SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', 'nomatch') AS foo; +SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', 'nomatch'); +-- errors +SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'zippy') AS foo; +SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'zippy'); +-- global option meaningless for regexp_split +SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'g') AS foo; +SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'g'); + +-- change NULL-display back +\pset null '' + -- E021-11 position expression SELECT POSITION('4' IN '1234567890') = '4' AS "4"; |
