From ab9907f5e5eba6e4e17a279d07a1a9df21ec5b19 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Tue, 29 Jul 2008 18:31:20 +0000
Subject: Add a new, improved version of citext as a contrib module.
David E. Wheeler
---
doc/src/sgml/citext.sgml | 222 +++++++++++++++++++++++++++++++++++++++++++++
doc/src/sgml/contrib.sgml | 3 +-
doc/src/sgml/filelist.sgml | 3 +-
3 files changed, 226 insertions(+), 2 deletions(-)
create mode 100644 doc/src/sgml/citext.sgml
(limited to 'doc/src')
diff --git a/doc/src/sgml/citext.sgml b/doc/src/sgml/citext.sgml
new file mode 100644
index 00000000000..cb91da8897b
--- /dev/null
+++ b/doc/src/sgml/citext.sgml
@@ -0,0 +1,222 @@
+
+
+
+ citext
+
+
+ citext
+
+
+
+ The citext> module provides a case-insensitive
+ character string type, citext>. Essentially, it internally calls
+ lower> when comparing values. Otherwise, it behaves almost
+ exactly like text>.
+
+
+
+ Rationale
+
+
+ The standard approach to doing case-insensitive matches
+ in PostgreSQL> has been to use the lower>
+ function when comparing values, for example
+
+
+ SELECT * FROM tab WHERE lower(col) = LOWER(?);
+
+
+
+
+ This works reasonably well, but has a number of drawbacks:
+
+
+
+
+
+ It makes your SQL statements verbose, and you always have to remember to
+ use lower> on both the column and the query value.
+
+
+
+
+ It won't use an index, unless you create a functional index using
+ lower>.
+
+
+
+
+ If you declare a column as UNIQUE> or PRIMARY
+ KEY>, the implicitly generated index is case-sensitive. So it's
+ useless for case-insensitive searches, and it won't enforce
+ uniqueness case-insensitively.
+
+
+
+
+
+ The citext> data type allows you to eliminate calls
+ to lower> in SQL queries, and allows a primary key to
+ be case-insensitive. citext> is locale-aware, just
+ like text>, which means that the comparison of uppercase and
+ lowercase characters is dependent on the rules of
+ the LC_CTYPE> locale setting. Again, this behavior is
+ identical to the use of lower> in queries. But because it's
+ done transparently by the datatype, you don't have to remember to do
+ anything special in your queries.
+
+
+
+
+
+ How to Use It
+
+
+ Here's a simple example of usage:
+
+
+ CREATE TABLE users (
+ nick CITEXT PRIMARY KEY,
+ pass TEXT NOT NULL
+ );
+
+ INSERT INTO users VALUES ( 'larry', md5(random()::text) );
+ INSERT INTO users VALUES ( 'Tom', md5(random()::text) );
+ INSERT INTO users VALUES ( 'Damian', md5(random()::text) );
+ INSERT INTO users VALUES ( 'NEAL', md5(random()::text) );
+ INSERT INTO users VALUES ( 'Bjørn', md5(random()::text) );
+
+ SELECT * FROM users WHERE nick = 'Larry';
+
+
+ The SELECT> statement will return one tuple, even though
+ the nick> column was set to larry> and the query
+ was for Larry>.
+
+
+
+
+ Limitations
+
+
+
+
+ citext>'s behavior depends on
+ the LC_CTYPE> setting of your database. How it compares
+ values is therefore determined when
+ initdb> is run to create the cluster. It is not truly
+ case-insensitive in the terms defined by the Unicode standard.
+ Effectively, what this means is that, as long as you're happy with your
+ collation, you should be happy with citext>'s comparisons. But
+ if you have data in different languages stored in your database, users
+ of one language may find their query results are not as expected if the
+ collation is for another language.
+
+
+
+
+
+ The pattern-matching comparison operators, such as LIKE>,
+ ~>, ~~>, !~>, !~~>, etc.,
+ have been overloaded to make case-insensitive comparisons when their
+ left-hand argument is of type citext>. However, related
+ functions have not been changed, including:
+
+
+
+
+
+ regexp_replace()>
+
+
+
+
+ regexp_split_to_array()>
+
+
+
+
+ regexp_split_to_table()>
+
+
+
+
+ replace()>
+
+
+
+
+ split_part()>
+
+
+
+
+ strpos()>
+
+
+
+
+ translate()>
+
+
+
+
+
+ Of course, for the regular expression functions, you can specify
+ case-insensitive comparisons in their flags> arguments, but
+ the same cannot be done for the the non-regexp functions.
+
+
+
+
+
+ citext> is not as efficient as text> because the
+ operator functions and the btree comparison functions must make copies
+ of the data and convert it to lower case for comparisons. It is,
+ however, slightly more efficient than using lower> to get
+ case-insensitive matching.
+
+
+
+
+
+ PostgreSQL> supports casting between text>
+ and any other type (even non-string types) by using the other type's
+ I/O functions. This doesn't work with citext>. However,
+ you can cast via I/O functions in two steps, for example
+ somevalue>::text::citext or
+ citextvalue>::text::sometype>.
+
+
+
+
+
+ citext> doesn't help much if you need data to compare
+ case-sensitively in some contexts and case-insensitively in other
+ contexts. The standard answer is to use the text> type and
+ manually use the lower> function when you need to compare
+ case-insensitively; this works all right if case-insensitive comparison
+ is needed only infrequently. If you need case-insensitive most of
+ the time and case-sensitive infrequently, consider storing the data
+ as citext> and explicitly casting the column to text>
+ when you want case-sensitive comparison. In either situation, you
+ will need two indexes if you want both types of searches to be fast.
+
+
+
+
+
+
+ Author
+
+
+ David E. Wheeler david@kineticode.com
+
+
+
+ Inspired by the original citext> module by Donald Fraser.
+
+
+
+
+
diff --git a/doc/src/sgml/contrib.sgml b/doc/src/sgml/contrib.sgml
index b9d78c1c76b..bac5044205b 100644
--- a/doc/src/sgml/contrib.sgml
+++ b/doc/src/sgml/contrib.sgml
@@ -1,4 +1,4 @@
-
+
Additional Supplied Modules
@@ -81,6 +81,7 @@ psql -d dbname -f SHAREDIR>/contrib/module>.sql
&adminpack;
&btree-gist;
&chkpass;
+ &citext;
&cube;
&dblink;
&dict-int;
diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml
index b6305dc535e..32aa90400bb 100644
--- a/doc/src/sgml/filelist.sgml
+++ b/doc/src/sgml/filelist.sgml
@@ -1,4 +1,4 @@
-
+
@@ -94,6 +94,7 @@
+
--
cgit v1.2.3