Fix bugs in contrib/pg_trgm's LIKE pattern analysis code.

author Tom Lane <tgl@sss.pgh.pa.us>

Mon, 20 Aug 2012 17:24:52 +0000 (13:24 -0400)

committer Tom Lane <tgl@sss.pgh.pa.us>

Mon, 20 Aug 2012 17:25:42 +0000 (13:25 -0400)
author Tom Lane <tgl@sss.pgh.pa.us>
Mon, 20 Aug 2012 17:24:52 +0000 (13:24 -0400)
committer Tom Lane <tgl@sss.pgh.pa.us>
Mon, 20 Aug 2012 17:25:42 +0000 (13:25 -0400)
diff --git a/contrib/pg_trgm/expected/pg_trgm.out b/contrib/pg_trgm/expected/pg_trgm.out

index e7af7d48902e0c88f7720d5d4dd211cf454288af..81d0ca80b206394799de4a3f1a9bdaba0f124364 100644 (file)
--- a/contrib/pg_trgm/expected/pg_trgm.out
+++ b/contrib/pg_trgm/expected/pg_trgm.out
@@ -3497,6 +3497,12 @@ select * from test2 where t like '%bcd%';
   abcdef
  (1 row)
  
+select * from test2 where t like E'%\\bcd%';
+   t    
+--------
+ abcdef
+(1 row)
+
  select * from test2 where t ilike '%BCD%';
     t    
  --------
@@ -3539,6 +3545,12 @@ select * from test2 where t like '%bcd%';
   abcdef
  (1 row)
  
+select * from test2 where t like E'%\\bcd%';
+   t    
+--------
+ abcdef
+(1 row)
+
  select * from test2 where t ilike '%BCD%';
     t    
  --------
diff --git a/contrib/pg_trgm/sql/pg_trgm.sql b/contrib/pg_trgm/sql/pg_trgm.sql

index ea902f602f9271bb54803ba1a382b5f916560bc5..81ab1e79b17df0c5855b10e01298ab765e95d94d 100644 (file)
--- a/contrib/pg_trgm/sql/pg_trgm.sql
+++ b/contrib/pg_trgm/sql/pg_trgm.sql
@@ -49,6 +49,7 @@ explain (costs off)
    select * from test2 where t ilike '%BCD%';
  select * from test2 where t like '%BCD%';
  select * from test2 where t like '%bcd%';
+select * from test2 where t like E'%\\bcd%';
  select * from test2 where t ilike '%BCD%';
  select * from test2 where t ilike 'qua%';
  drop index test2_idx_gin;
@@ -60,5 +61,6 @@ explain (costs off)
    select * from test2 where t ilike '%BCD%';
  select * from test2 where t like '%BCD%';
  select * from test2 where t like '%bcd%';
+select * from test2 where t like E'%\\bcd%';
  select * from test2 where t ilike '%BCD%';
  select * from test2 where t ilike 'qua%';
diff --git a/contrib/pg_trgm/trgm_op.c b/contrib/pg_trgm/trgm_op.c

index 4e32c6f654c164206fb293fcb3d76f642ed72d38..87dffd1dd2c9b773c91c1243e96f6483735e601d 100644 (file)
--- a/contrib/pg_trgm/trgm_op.c
+++ b/contrib/pg_trgm/trgm_op.c
@@ -272,33 +272,36 @@ get_wildcard_part(const char *str, int lenstr,
     const char *beginword = str;
     const char *endword;
     char       *s = buf;
-   bool        in_wildcard_meta = false;
+   bool        in_leading_wildcard_meta = false;
+   bool        in_trailing_wildcard_meta = false;
     bool        in_escape = false;
     int         clen;
  
     /*
-    * Find the first word character remembering whether last character was
-    * wildcard meta-character.
+    * Find the first word character, remembering whether preceding character
+    * was wildcard meta-character.  Note that the in_escape state persists
+    * from this loop to the next one, since we may exit at a word character
+    * that is in_escape.
      */
     while (beginword - str < lenstr)
     {
         if (in_escape)
         {
-           in_escape = false;
-           in_wildcard_meta = false;
             if (iswordchr(beginword))
                 break;
+           in_escape = false;
+           in_leading_wildcard_meta = false;
         }
         else
         {
             if (ISESCAPECHAR(beginword))
                 in_escape = true;
             else if (ISWILDCARDCHAR(beginword))
-               in_wildcard_meta = true;
+               in_leading_wildcard_meta = true;
             else if (iswordchr(beginword))
                 break;
             else
-               in_wildcard_meta = false;
+               in_leading_wildcard_meta = false;
         }
         beginword += pg_mblen(beginword);
     }
@@ -310,11 +313,11 @@ get_wildcard_part(const char *str, int lenstr,
         return NULL;
  
     /*
-    * Add left padding spaces if last character wasn't wildcard
+    * Add left padding spaces if preceding character wasn't wildcard
      * meta-character.
      */
     *charlen = 0;
-   if (!in_wildcard_meta)
+   if (!in_leading_wildcard_meta)
     {
         if (LPADDING > 0)
         {
@@ -333,15 +336,11 @@ get_wildcard_part(const char *str, int lenstr,
      * string boundary.  Strip escapes during copy.
      */
     endword = beginword;
-   in_wildcard_meta = false;
-   in_escape = false;
     while (endword - str < lenstr)
     {
         clen = pg_mblen(endword);
         if (in_escape)
         {
-           in_escape = false;
-           in_wildcard_meta = false;
             if (iswordchr(endword))
             {
                 memcpy(s, endword, clen);
@@ -349,7 +348,17 @@ get_wildcard_part(const char *str, int lenstr,
                 s += clen;
             }
             else
+           {
+               /*
+                * Back up endword to the escape character when stopping at
+                * an escaped char, so that subsequent get_wildcard_part will
+                * restart from the escape character.  We assume here that
+                * escape chars are single-byte.
+                */
+               endword--;
                 break;
+           }
+           in_escape = false;
         }
         else
         {
@@ -357,7 +366,7 @@ get_wildcard_part(const char *str, int lenstr,
                 in_escape = true;
             else if (ISWILDCARDCHAR(endword))
             {
-               in_wildcard_meta = true;
+               in_trailing_wildcard_meta = true;
                 break;
             }
             else if (iswordchr(endword))
@@ -367,19 +376,16 @@ get_wildcard_part(const char *str, int lenstr,
                 s += clen;
             }
             else
-           {
-               in_wildcard_meta = false;
                 break;
-           }
         }
         endword += clen;
     }
  
     /*
-    * Add right padding spaces if last character wasn't wildcard
+    * Add right padding spaces if next character isn't wildcard
      * meta-character.
      */
-   if (!in_wildcard_meta)
+   if (!in_trailing_wildcard_meta)
     {
         if (RPADDING > 0)
         {
author	Tom Lane <tgl@sss.pgh.pa.us>
	Mon, 20 Aug 2012 17:24:52 +0000 (13:24 -0400)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Mon, 20 Aug 2012 17:25:42 +0000 (13:25 -0400)
contrib/pg_trgm/expected/pg_trgm.out		patch \| blob \| blame \| history
contrib/pg_trgm/sql/pg_trgm.sql		patch \| blob \| blame \| history
contrib/pg_trgm/trgm_op.c		patch \| blob \| blame \| history