Go back to using a separate method for doing ILIKE for single byte

author Andrew Dunstan <andrew@dunslane.net>

Sat, 22 Sep 2007 03:58:34 +0000 (03:58 +0000)

committer Andrew Dunstan <andrew@dunslane.net>

Sat, 22 Sep 2007 03:58:34 +0000 (03:58 +0000)
author Andrew Dunstan <andrew@dunslane.net>
Sat, 22 Sep 2007 03:58:34 +0000 (03:58 +0000)
committer Andrew Dunstan <andrew@dunslane.net>
Sat, 22 Sep 2007 03:58:34 +0000 (03:58 +0000)
diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c

index 1603a4e61ae786d882db6a0b8fed800f2e06e0ae..4c4ca2c19363e59d67fde32ee341aa30579a8589 100644 (file)
--- a/src/backend/utils/adt/like.c
+++ b/src/backend/utils/adt/like.c
@@ -11,7 +11,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *     $PostgreSQL: pgsql/src/backend/utils/adt/like.c,v 1.70 2007/09/21 22:52:52 tgl Exp $
+ *     $PostgreSQL: pgsql/src/backend/utils/adt/like.c,v 1.71 2007/09/22 03:58:34 adunstan Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -36,6 +36,8 @@ static text *MB_do_like_escape(text *, text *);
  
  static int     UTF8_MatchText(char *t, int tlen, char *p, int plen);
  
+static int     SB_IMatchText(char *t, int tlen, char *p, int plen);
+
  static int     GenericMatchText(char *s, int slen, char* p, int plen);
  static int     Generic_Text_IC_like(text *str, text *pat);
  
@@ -104,6 +106,12 @@ wchareq(char *p1, char *p2)
  
  #include "like_match.c"
  
+/* setup to compile like_match.c for single byte case insensitive matches */
+#define MATCH_LOWER
+#define NextChar(p, plen) NextByte((p), (plen))
+#define MatchText SB_IMatchText
+
+#include "like_match.c"
  
  /* setup to compile like_match.c for UTF8 encoding, using fast NextChar */
  
@@ -132,16 +140,33 @@ Generic_Text_IC_like(text *str, text *pat)
         int                     slen,
                                 plen;
  
-       /* Force inputs to lower case to achieve case insensitivity */
-       str = DatumGetTextP(DirectFunctionCall1(lower, PointerGetDatum(str)));
-       pat = DatumGetTextP(DirectFunctionCall1(lower, PointerGetDatum(pat)));
-       /* lower's result is never packed, so OK to use old macros here */
-       s = VARDATA(str);
-       slen = (VARSIZE(str) - VARHDRSZ);
-       p = VARDATA(pat);
-       plen = (VARSIZE(pat) - VARHDRSZ);
+       /* For efficiency reasons, in the single byte case we don't call
+        * lower() on the pattern and text, but instead call to_lower on each
+        * character.  In the multi-byte case we don't have much choice :-(
+        */
  
-       return GenericMatchText(s, slen, p, plen);
+       if (pg_database_encoding_max_length() > 1)
+       {
+               /* lower's result is never packed, so OK to use old macros here */
+               pat = DatumGetTextP(DirectFunctionCall1(lower, PointerGetDatum(pat)));
+               p = VARDATA(pat);
+               plen = (VARSIZE(pat) - VARHDRSZ);
+               str = DatumGetTextP(DirectFunctionCall1(lower, PointerGetDatum(str)));
+               s = VARDATA(str);
+               slen = (VARSIZE(str) - VARHDRSZ);
+               if (GetDatabaseEncoding() == PG_UTF8)
+                       return UTF8_MatchText(s, slen, p, plen);
+               else
+                       return MB_MatchText(s, slen, p, plen);
+       }
+       else
+       {
+               p = VARDATA_ANY(pat);
+               plen = VARSIZE_ANY_EXHDR(pat);
+               s = VARDATA_ANY(str);
+               slen = VARSIZE_ANY_EXHDR(str);
+               return SB_IMatchText(s, slen, p, plen);
+       }
  }
  
  /*
diff --git a/src/backend/utils/adt/like_match.c b/src/backend/utils/adt/like_match.c

index 7ab29623f3c9985856503d2aa67176b6773f8e11..f2ee0bae0ec434c19dd329e413fe409e1573c895 100644 (file)
--- a/src/backend/utils/adt/like_match.c
+++ b/src/backend/utils/adt/like_match.c
@@ -3,8 +3,9 @@
   * like_match.c
   *       like expression handling internal code.
   *
- * This file is included by like.c three times, to provide natching code for
- * single-byte encodings, UTF8, and for other multi-byte encodings.
+ * This file is included by like.c four times, to provide natching code for
+ * single-byte encodings, UTF8, and for other multi-byte encodings,
+ * and case insensitive matches for single byte encodings.
   * UTF8 is a special case because we can use a much more efficient version
   * of NextChar than can be used for other multi-byte encodings.
   *
@@ -13,11 +14,12 @@
   * NextChar 
   * MatchText - to name of function wanted
   * do_like_escape - name of function if wanted - needs CHAREQ and CopyAdvChar
+ * MATCH_LOWER - define iff using to_lower on text chars
   *
   * Copyright (c) 1996-2007, PostgreSQL Global Development Group
   *
   * IDENTIFICATION
- *     $PostgreSQL: pgsql/src/backend/utils/adt/like_match.c,v 1.17 2007/09/21 22:52:52 tgl Exp $
+ *     $PostgreSQL: pgsql/src/backend/utils/adt/like_match.c,v 1.18 2007/09/22 03:58:34 adunstan Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -68,6 +70,12 @@
   *--------------------
   */
  
+#ifdef MATCH_LOWER
+#define TCHAR(t) tolower((t))
+#else
+#define TCHAR(t) (t)
+#endif
+
  static int
  MatchText(char *t, int tlen, char *p, int plen)
  {
@@ -143,13 +151,13 @@ MatchText(char *t, int tlen, char *p, int plen)
                         else
                         {
  
-                               char firstpat = *p ;
+                               char firstpat = TCHAR(*p) ;
  
                                 if (*p == '\\')
                                 {
                                         if (plen < 2)
                                                 return LIKE_FALSE;
-                                       firstpat = p[1];
+                                       firstpat = TCHAR(p[1]);
                                 }
  
                                 while (tlen > 0)
@@ -158,7 +166,7 @@ MatchText(char *t, int tlen, char *p, int plen)
                                          * Optimization to prevent most recursion: don't recurse
                                          * unless first pattern byte matches first text byte.
                                          */
-                                       if (*t == firstpat)
+                                       if (TCHAR(*t) == firstpat)
                                         {
                                                 int                     matched = MatchText(t, tlen, p, plen);
                                                 
@@ -183,7 +191,7 @@ MatchText(char *t, int tlen, char *p, int plen)
                         NextByte(p, plen);
                         continue;
                 }
-               else if (*t != *p)
+               else if (TCHAR(*t) != TCHAR(*p))
                 {
                         /*
                          * Not the single-character wildcard and no explicit match? Then
@@ -338,3 +346,8 @@ do_like_escape(text *pat, text *esc)
  #undef do_like_escape
  #endif
  
+#undef TCHAR
+
+#ifdef MATCH_LOWER
+#undef MATCH_LOWER
+#endif
author	Andrew Dunstan <andrew@dunslane.net>
	Sat, 22 Sep 2007 03:58:34 +0000 (03:58 +0000)
committer	Andrew Dunstan <andrew@dunslane.net>
	Sat, 22 Sep 2007 03:58:34 +0000 (03:58 +0000)
src/backend/utils/adt/like.c		patch \| blob \| blame \| history
src/backend/utils/adt/like_match.c		patch \| blob \| blame \| history