From 2c0556068fc308ed9cce06c85de7e42305d34b86 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Thu, 15 May 2003 15:50:21 +0000 Subject: Indexing support for pattern matching operations via separate operator class when lc_collate is not C. --- doc/src/sgml/charset.sgml | 16 +++------------- doc/src/sgml/indices.sgml | 45 ++++++++++++++++++++++++++++++++++++++++++++- doc/src/sgml/release.sgml | 3 ++- doc/src/sgml/runtime.sgml | 29 ++++++++--------------------- 4 files changed, 57 insertions(+), 36 deletions(-) (limited to 'doc/src/sgml') diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml index 8c2af90770d..213f3a8f62b 100644 --- a/doc/src/sgml/charset.sgml +++ b/doc/src/sgml/charset.sgml @@ -1,4 +1,4 @@ - + Localization</> @@ -213,23 +213,13 @@ initdb --locale=sv_SE The <function>to_char</> family of functions </para> </listitem> - - <listitem> - <para> - The <literal>LIKE</> and <literal>~</> operators for pattern - matching - </para> - </listitem> </itemizedlist> </para> <para> The only severe drawback of using the locale support in - <productname>PostgreSQL</> is its speed. So use locales only if you - actually need it. It should be noted in particular that selecting - a non-C locale disables index optimizations for <literal>LIKE</> and - <literal>~</> operators, which can make a huge difference in the - speed of searches that use those operators. + <productname>PostgreSQL</> is its speed. So use locales only if + you actually need them. </para> </sect2> diff --git a/doc/src/sgml/indices.sgml b/doc/src/sgml/indices.sgml index d900b941fa9..fcd7108a14c 100644 --- a/doc/src/sgml/indices.sgml +++ b/doc/src/sgml/indices.sgml @@ -1,4 +1,4 @@ -<!-- $Header: /cvsroot/pgsql/doc/src/sgml/indices.sgml,v 1.40 2003/03/25 16:15:36 petere Exp $ --> +<!-- $Header: /cvsroot/pgsql/doc/src/sgml/indices.sgml,v 1.41 2003/05/15 15:50:18 petere Exp $ --> <chapter id="indexes"> <title id="indexes-title">Indexes @@ -132,6 +132,19 @@ CREATE INDEX test1_id_index ON test1 (id); + + The optimizer can also use a B-tree index for queries involving the + pattern matching operators LIKE, + ILIKE, ~, and + ~*, if the pattern is + anchored to the beginning of the string, e.g., col LIKE + 'foo%' or col ~ '^foo', but not + col LIKE '%bar'. However, if your server does + not use the C locale you will need to create the index with a + special operator class. See + below. + + indexes @@ -405,6 +418,36 @@ CREATE INDEX name ON table bigbox_ops. + + + + The operator classes text_pattern_ops, + varchar_pattern_ops, + bpchar_pattern_ops, and + name_pattern_ops support B-tree indexes on + the types text, varchar, + char, and name, respectively. The + difference to the ordinary operator classes is that the values + are compared strictly character by character rather than + according to the locale-specific collation rules. This makes + these operator classes suitable for use by queries involving + pattern matching expressions (LIKE or POSIX + regular expressions) if the server does not use the standard + C locale. As an example, to index a + varchar column like this: + +CREATE INDEX test_index ON test_table (col varchar_pattern_ops); + + If you do use the C locale, you should instead create an index + with the default operator class. Also note that you should + create an index with the default operator class if you want + queries involving ordinary comparisons to use an index. Such + queries cannot use the + xxx_pattern_ops + operator classes. It is possible, however, to create multiple + indexes on the same column with different operator classes. + + diff --git a/doc/src/sgml/release.sgml b/doc/src/sgml/release.sgml index 9332ac499b4..9d9b758e89c 100644 --- a/doc/src/sgml/release.sgml +++ b/doc/src/sgml/release.sgml @@ -1,5 +1,5 @@ @@ -24,6 +24,7 @@ CDATA means the content is "SGML-free", so you can write without worries about funny characters. --> @@ -133,26 +133,13 @@ postgres$ initdb -D /usr/local/pgsql/data - initdb also initializes the default localelocale for - the database cluster. Normally, it will just take the locale - settings in the environment and apply them to the initialized - database. It is possible to specify a different locale for the - database; more information about that can be found in . One surprise you might encounter while running - initdb is a notice similar to this: - -The database cluster will be initialized with locale de_DE. -This locale setting will prevent the use of indexes for pattern matching -operations. If that is a concern, rerun initdb with the collation order -set to "C". For more information see the documentation. - - This is intended to warn you that the currently selected locale - will cause indexes to be sorted in an order that prevents them from - being used for LIKE and regular-expression searches. If you need - good performance in such searches, you should set your current - locale to C and re-run initdb, e.g., - by running initdb --lc-collate=C. The sort - order used within a particular database cluster is set by + initdb also initializes the default + localelocale for the database cluster. + Normally, it will just take the locale settings in the environment + and apply them to the initialized database. It is possible to + specify a different locale for the database; more information about + that can be found in . The sort order used + within a particular database cluster is set by initdb and cannot be changed later, short of dumping all data, rerunning initdb, and reloading the data. So it's important to make this choice correctly -- cgit v1.2.3