diff options
| author | Michael Paquier | 2025-05-27 23:58:40 +0000 |
|---|---|---|
| committer | Michael Paquier | 2025-05-27 23:58:40 +0000 |
| commit | d46911e584d48ee01d5bf75699a77fbf635c865d (patch) | |
| tree | 3e7e39095ad21a728f7bb0667fe48c1249d3c05b /src/test | |
| parent | 3e782ca32225e5d5219251a5a3c06698c1e824f8 (diff) | |
Fix conversion of SIMILAR TO regexes for character classes
The code that translates SIMILAR TO pattern matching expressions to
POSIX-style regular expressions did not consider that square brackets
can be nested. For example, in an expression like [[:alpha:]%_], the
logic replaced the placeholders '_' and '%' but it should not.
This commit fixes the conversion logic by tracking the nesting level of
square brackets marking character class areas, while considering that
in expressions like []] or [^]] the first closing square bracket is a
regular character. Multiple tests are added to show how the conversions
should or should not apply applied while in a character class area, with
specific cases added for all the characters converted outside character
classes like an opening parenthesis '(', dollar sign '$', etc.
Author: Laurenz Albe <laurenz.albe@cybertec.at>
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Reviewed-by: Michael Paquier <michael@paquier.xyz>
Discussion: https://postgr.es/m/16ab039d1af455652bdf4173402ddda145f2c73b.camel@cybertec.at
Backpatch-through: 13
Diffstat (limited to 'src/test')
| -rw-r--r-- | src/test/regress/expected/strings.out | 62 | ||||
| -rw-r--r-- | src/test/regress/sql/strings.sql | 20 |
2 files changed, 82 insertions, 0 deletions
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out index 174f0a68331..9c0aba5c029 100644 --- a/src/test/regress/expected/strings.out +++ b/src/test/regress/expected/strings.out @@ -614,6 +614,68 @@ SELECT 'abcdefg' SIMILAR TO '_bcd%' ESCAPE NULL AS null; SELECT 'abcdefg' SIMILAR TO '_bcd#%' ESCAPE '##' AS error; ERROR: invalid escape string HINT: Escape string must be empty or one character. +-- Characters that should be left alone in character classes when a +-- SIMILAR TO regexp pattern is converted to POSIX style. +-- Underscore "_" +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '_[_[:alpha:]_]_'; + QUERY PLAN +------------------------------------------------ + Seq Scan on text_tbl + Filter: (f1 ~ '^(?:.[_[:alpha:]_].)$'::text) +(2 rows) + +-- Percentage "%" +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '%[%[:alnum:]%]%'; + QUERY PLAN +-------------------------------------------------- + Seq Scan on text_tbl + Filter: (f1 ~ '^(?:.*[%[:alnum:]%].*)$'::text) +(2 rows) + +-- Dot "." +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '.[.[:alnum:].].'; + QUERY PLAN +-------------------------------------------------- + Seq Scan on text_tbl + Filter: (f1 ~ '^(?:\.[.[:alnum:].]\.)$'::text) +(2 rows) + +-- Dollar "$" +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '$[$[:alnum:]$]$'; + QUERY PLAN +-------------------------------------------------- + Seq Scan on text_tbl + Filter: (f1 ~ '^(?:\$[$[:alnum:]$]\$)$'::text) +(2 rows) + +-- Opening parenthesis "(" +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '([([:alnum:](]('; +ERROR: invalid regular expression: parentheses () not balanced +-- Caret "^" +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '^[^[:alnum:]^[^^][[^^]][\^][[\^]]\^]^'; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on text_tbl + Filter: (f1 ~ '^(?:\^[^[:alnum:]^[^^][[^^]][\^][[\^]]\^]\^)$'::text) +(2 rows) + +-- Closing square bracket "]" at the beginning of character class +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '[]%][^]%][^%]%'; + QUERY PLAN +------------------------------------------------ + Seq Scan on text_tbl + Filter: (f1 ~ '^(?:[]%][^]%][^%].*)$'::text) +(2 rows) + +-- Closing square bracket effective after two carets at the beginning +-- of character class. +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '[^^]^'; + QUERY PLAN +--------------------------------------- + Seq Scan on text_tbl + Filter: (f1 ~ '^(?:[^^]\^)$'::text) +(2 rows) + -- Test backslash escapes in regexp_replace's replacement string SELECT regexp_replace('1112223333', E'(\\d{3})(\\d{3})(\\d{4})', E'(\\1) \\2-\\3'); regexp_replace diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql index f7b325baadf..2a4fa534997 100644 --- a/src/test/regress/sql/strings.sql +++ b/src/test/regress/sql/strings.sql @@ -197,6 +197,26 @@ SELECT 'abcd\efg' SIMILAR TO '_bcd\%' ESCAPE '' AS true; SELECT 'abcdefg' SIMILAR TO '_bcd%' ESCAPE NULL AS null; SELECT 'abcdefg' SIMILAR TO '_bcd#%' ESCAPE '##' AS error; +-- Characters that should be left alone in character classes when a +-- SIMILAR TO regexp pattern is converted to POSIX style. +-- Underscore "_" +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '_[_[:alpha:]_]_'; +-- Percentage "%" +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '%[%[:alnum:]%]%'; +-- Dot "." +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '.[.[:alnum:].].'; +-- Dollar "$" +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '$[$[:alnum:]$]$'; +-- Opening parenthesis "(" +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '([([:alnum:](]('; +-- Caret "^" +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '^[^[:alnum:]^[^^][[^^]][\^][[\^]]\^]^'; +-- Closing square bracket "]" at the beginning of character class +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '[]%][^]%][^%]%'; +-- Closing square bracket effective after two carets at the beginning +-- of character class. +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '[^^]^'; + -- Test backslash escapes in regexp_replace's replacement string SELECT regexp_replace('1112223333', E'(\\d{3})(\\d{3})(\\d{4})', E'(\\1) \\2-\\3'); SELECT regexp_replace('foobarrbazz', E'(.)\\1', E'X\\&Y', 'g'); |
