From 628cbb50ba80c83917b07a7609ddec12cda172d0 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 10 Jul 2012 14:54:37 -0400 Subject: Re-implement extraction of fixed prefixes from regular expressions. To generate btree-indexable conditions from regex WHERE conditions (such as WHERE indexed_col ~ '^foo'), we need to be able to identify any fixed prefix that a regex might have; that is, find any string that must be a prefix of all strings satisfying the regex. We used to do that with entirely ad-hoc code that looked at the source text of the regex. It didn't know very much about regex syntax, which mostly meant that it would fail to identify some optimizable cases; but Viktor Rosenfeld reported that it would produce actively wrong answers for quantified parenthesized subexpressions, such as '^(foo)?bar'. Rather than trying to extend the ad-hoc code to cover this, let's get rid of it altogether in favor of identifying prefixes by examining the compiled form of a regex. To do this, I've added a new entry point "pg_regprefix" to the regex library; hopefully it is defined in a sufficiently general fashion that it can remain in the library when/if that code gets split out as a standalone project. Since this bug has been there for a very long time, this fix needs to get back-patched. However it depends on some other recent commits (particularly the addition of wchar-to-database-encoding conversion), so I'll commit this separately and then go to work on back-porting the necessary fixes. --- src/test/regress/expected/regex.out | 63 +++++++++++++++++++++++++++++++++++++ src/test/regress/sql/regex.sql | 10 ++++++ 2 files changed, 73 insertions(+) (limited to 'src/test') diff --git a/src/test/regress/expected/regex.out b/src/test/regress/expected/regex.out index dc0c713b408..658538fd419 100644 --- a/src/test/regress/expected/regex.out +++ b/src/test/regress/expected/regex.out @@ -90,3 +90,66 @@ select substring('a' from '((a)+)'); a (1 row) +-- Test conversion of regex patterns to indexable conditions +explain (costs off) select * from pg_proc where proname ~ 'abc'; + QUERY PLAN +----------------------------------- + Seq Scan on pg_proc + Filter: (proname ~ 'abc'::text) +(2 rows) + +explain (costs off) select * from pg_proc where proname ~ '^abc'; + QUERY PLAN +---------------------------------------------------------------------- + Index Scan using pg_proc_proname_args_nsp_index on pg_proc + Index Cond: ((proname >= 'abc'::name) AND (proname < 'abd'::name)) + Filter: (proname ~ '^abc'::text) +(3 rows) + +explain (costs off) select * from pg_proc where proname ~ '^abc$'; + QUERY PLAN +------------------------------------------------------------ + Index Scan using pg_proc_proname_args_nsp_index on pg_proc + Index Cond: (proname = 'abc'::name) + Filter: (proname ~ '^abc$'::text) +(3 rows) + +explain (costs off) select * from pg_proc where proname ~ '^abcd*e'; + QUERY PLAN +---------------------------------------------------------------------- + Index Scan using pg_proc_proname_args_nsp_index on pg_proc + Index Cond: ((proname >= 'abc'::name) AND (proname < 'abd'::name)) + Filter: (proname ~ '^abcd*e'::text) +(3 rows) + +explain (costs off) select * from pg_proc where proname ~ '^abc+d'; + QUERY PLAN +---------------------------------------------------------------------- + Index Scan using pg_proc_proname_args_nsp_index on pg_proc + Index Cond: ((proname >= 'abc'::name) AND (proname < 'abd'::name)) + Filter: (proname ~ '^abc+d'::text) +(3 rows) + +explain (costs off) select * from pg_proc where proname ~ '^(abc)(def)'; + QUERY PLAN +---------------------------------------------------------------------------- + Index Scan using pg_proc_proname_args_nsp_index on pg_proc + Index Cond: ((proname >= 'abcdef'::name) AND (proname < 'abcdeg'::name)) + Filter: (proname ~ '^(abc)(def)'::text) +(3 rows) + +explain (costs off) select * from pg_proc where proname ~ '^(abc)$'; + QUERY PLAN +------------------------------------------------------------ + Index Scan using pg_proc_proname_args_nsp_index on pg_proc + Index Cond: (proname = 'abc'::name) + Filter: (proname ~ '^(abc)$'::text) +(3 rows) + +explain (costs off) select * from pg_proc where proname ~ '^(abc)?d'; + QUERY PLAN +---------------------------------------- + Seq Scan on pg_proc + Filter: (proname ~ '^(abc)?d'::text) +(2 rows) + diff --git a/src/test/regress/sql/regex.sql b/src/test/regress/sql/regex.sql index 9fdcb2f5bd5..c29ed05d768 100644 --- a/src/test/regress/sql/regex.sql +++ b/src/test/regress/sql/regex.sql @@ -24,3 +24,13 @@ select 'abc abc abd' ~ '^(.+)( \1)+$' as f; select substring('asd TO foo' from ' TO (([a-z0-9._]+|"([^"]+|"")+")+)'); select substring('a' from '((a))+'); select substring('a' from '((a)+)'); + +-- Test conversion of regex patterns to indexable conditions +explain (costs off) select * from pg_proc where proname ~ 'abc'; +explain (costs off) select * from pg_proc where proname ~ '^abc'; +explain (costs off) select * from pg_proc where proname ~ '^abc$'; +explain (costs off) select * from pg_proc where proname ~ '^abcd*e'; +explain (costs off) select * from pg_proc where proname ~ '^abc+d'; +explain (costs off) select * from pg_proc where proname ~ '^(abc)(def)'; +explain (costs off) select * from pg_proc where proname ~ '^(abc)$'; +explain (costs off) select * from pg_proc where proname ~ '^(abc)?d'; -- cgit v1.2.3