summaryrefslogtreecommitdiff
path: root/src/test
diff options
context:
space:
mode:
authorMichael Paquier2025-05-27 23:58:40 +0000
committerMichael Paquier2025-05-27 23:58:40 +0000
commitd46911e584d48ee01d5bf75699a77fbf635c865d (patch)
tree3e7e39095ad21a728f7bb0667fe48c1249d3c05b /src/test
parent3e782ca32225e5d5219251a5a3c06698c1e824f8 (diff)
Fix conversion of SIMILAR TO regexes for character classes
The code that translates SIMILAR TO pattern matching expressions to POSIX-style regular expressions did not consider that square brackets can be nested. For example, in an expression like [[:alpha:]%_], the logic replaced the placeholders '_' and '%' but it should not. This commit fixes the conversion logic by tracking the nesting level of square brackets marking character class areas, while considering that in expressions like []] or [^]] the first closing square bracket is a regular character. Multiple tests are added to show how the conversions should or should not apply applied while in a character class area, with specific cases added for all the characters converted outside character classes like an opening parenthesis '(', dollar sign '$', etc. Author: Laurenz Albe <laurenz.albe@cybertec.at> Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Reviewed-by: Michael Paquier <michael@paquier.xyz> Discussion: https://postgr.es/m/16ab039d1af455652bdf4173402ddda145f2c73b.camel@cybertec.at Backpatch-through: 13
Diffstat (limited to 'src/test')
-rw-r--r--src/test/regress/expected/strings.out62
-rw-r--r--src/test/regress/sql/strings.sql20
2 files changed, 82 insertions, 0 deletions
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index 174f0a68331..9c0aba5c029 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -614,6 +614,68 @@ SELECT 'abcdefg' SIMILAR TO '_bcd%' ESCAPE NULL AS null;
SELECT 'abcdefg' SIMILAR TO '_bcd#%' ESCAPE '##' AS error;
ERROR: invalid escape string
HINT: Escape string must be empty or one character.
+-- Characters that should be left alone in character classes when a
+-- SIMILAR TO regexp pattern is converted to POSIX style.
+-- Underscore "_"
+EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '_[_[:alpha:]_]_';
+ QUERY PLAN
+------------------------------------------------
+ Seq Scan on text_tbl
+ Filter: (f1 ~ '^(?:.[_[:alpha:]_].)$'::text)
+(2 rows)
+
+-- Percentage "%"
+EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '%[%[:alnum:]%]%';
+ QUERY PLAN
+--------------------------------------------------
+ Seq Scan on text_tbl
+ Filter: (f1 ~ '^(?:.*[%[:alnum:]%].*)$'::text)
+(2 rows)
+
+-- Dot "."
+EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '.[.[:alnum:].].';
+ QUERY PLAN
+--------------------------------------------------
+ Seq Scan on text_tbl
+ Filter: (f1 ~ '^(?:\.[.[:alnum:].]\.)$'::text)
+(2 rows)
+
+-- Dollar "$"
+EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '$[$[:alnum:]$]$';
+ QUERY PLAN
+--------------------------------------------------
+ Seq Scan on text_tbl
+ Filter: (f1 ~ '^(?:\$[$[:alnum:]$]\$)$'::text)
+(2 rows)
+
+-- Opening parenthesis "("
+EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '([([:alnum:](](';
+ERROR: invalid regular expression: parentheses () not balanced
+-- Caret "^"
+EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '^[^[:alnum:]^[^^][[^^]][\^][[\^]]\^]^';
+ QUERY PLAN
+------------------------------------------------------------------------
+ Seq Scan on text_tbl
+ Filter: (f1 ~ '^(?:\^[^[:alnum:]^[^^][[^^]][\^][[\^]]\^]\^)$'::text)
+(2 rows)
+
+-- Closing square bracket "]" at the beginning of character class
+EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '[]%][^]%][^%]%';
+ QUERY PLAN
+------------------------------------------------
+ Seq Scan on text_tbl
+ Filter: (f1 ~ '^(?:[]%][^]%][^%].*)$'::text)
+(2 rows)
+
+-- Closing square bracket effective after two carets at the beginning
+-- of character class.
+EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '[^^]^';
+ QUERY PLAN
+---------------------------------------
+ Seq Scan on text_tbl
+ Filter: (f1 ~ '^(?:[^^]\^)$'::text)
+(2 rows)
+
-- Test backslash escapes in regexp_replace's replacement string
SELECT regexp_replace('1112223333', E'(\\d{3})(\\d{3})(\\d{4})', E'(\\1) \\2-\\3');
regexp_replace
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index f7b325baadf..2a4fa534997 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -197,6 +197,26 @@ SELECT 'abcd\efg' SIMILAR TO '_bcd\%' ESCAPE '' AS true;
SELECT 'abcdefg' SIMILAR TO '_bcd%' ESCAPE NULL AS null;
SELECT 'abcdefg' SIMILAR TO '_bcd#%' ESCAPE '##' AS error;
+-- Characters that should be left alone in character classes when a
+-- SIMILAR TO regexp pattern is converted to POSIX style.
+-- Underscore "_"
+EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '_[_[:alpha:]_]_';
+-- Percentage "%"
+EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '%[%[:alnum:]%]%';
+-- Dot "."
+EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '.[.[:alnum:].].';
+-- Dollar "$"
+EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '$[$[:alnum:]$]$';
+-- Opening parenthesis "("
+EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '([([:alnum:](](';
+-- Caret "^"
+EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '^[^[:alnum:]^[^^][[^^]][\^][[\^]]\^]^';
+-- Closing square bracket "]" at the beginning of character class
+EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '[]%][^]%][^%]%';
+-- Closing square bracket effective after two carets at the beginning
+-- of character class.
+EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '[^^]^';
+
-- Test backslash escapes in regexp_replace's replacement string
SELECT regexp_replace('1112223333', E'(\\d{3})(\\d{3})(\\d{4})', E'(\\1) \\2-\\3');
SELECT regexp_replace('foobarrbazz', E'(.)\\1', E'X\\&Y', 'g');