This supports the triconsistent function for pg_trgm GIN opclass
authorTeodor Sigaev <teodor@sigaev.ru>
Mon, 20 Jul 2015 15:18:48 +0000 (18:18 +0300)
committerTeodor Sigaev <teodor@sigaev.ru>
Mon, 20 Jul 2015 15:18:48 +0000 (18:18 +0300)
to make it faster to implement indexed queries where some keys are
common and some are rare.

Patch by Jeff Janes

contrib/pg_trgm/Makefile
contrib/pg_trgm/pg_trgm--1.1--1.2.sql [new file with mode: 0644]
contrib/pg_trgm/pg_trgm--1.2.sql [moved from contrib/pg_trgm/pg_trgm--1.1.sql with 92% similarity]
contrib/pg_trgm/pg_trgm.control
contrib/pg_trgm/trgm_gin.c

index e081a1e5e9fea35e8dfc26853e2603030b10b8cf..1e38753622131dde28856f637f10596e7541d4d7 100644 (file)
@@ -4,7 +4,7 @@ MODULE_big = pg_trgm
 OBJS = trgm_op.o trgm_gist.o trgm_gin.o trgm_regexp.o $(WIN32RES)
 
 EXTENSION = pg_trgm
-DATA = pg_trgm--1.1.sql pg_trgm--1.0--1.1.sql pg_trgm--unpackaged--1.0.sql
+DATA = pg_trgm--1.2.sql pg_trgm--1.0--1.1.sql pg_trgm--1.1--1.2.sql pg_trgm--unpackaged--1.0.sql
 PGFILEDESC = "pg_trgm - trigram matching"
 
 REGRESS = pg_trgm
diff --git a/contrib/pg_trgm/pg_trgm--1.1--1.2.sql b/contrib/pg_trgm/pg_trgm--1.1--1.2.sql
new file mode 100644 (file)
index 0000000..c101f21
--- /dev/null
@@ -0,0 +1,12 @@
+/* contrib/pg_trgm/pg_trgm--1.1--1.2.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION pg_trgm UPDATE TO '1.2'" to load this file. \quit
+
+CREATE FUNCTION gin_trgm_triconsistent(internal, int2, text, int4, internal, internal, internal)
+RETURNS "char"
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE STRICT;
+
+ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD
+        FUNCTION        6    (text, text)   gin_trgm_triconsistent (internal, int2, text, int4, internal, internal, internal);
similarity index 92%
rename from contrib/pg_trgm/pg_trgm--1.1.sql
rename to contrib/pg_trgm/pg_trgm--1.2.sql
index 34b37e478721a57fd43ced0ccaf7fa90e60839e3..03d46d07f98187c6c0e5ef520bc4d47f61ce22f4 100644 (file)
@@ -1,4 +1,4 @@
-/* contrib/pg_trgm/pg_trgm--1.1.sql */
+/* contrib/pg_trgm/pg_trgm--1.2.sql */
 
 -- complain if script is sourced in psql, rather than via CREATE EXTENSION
 \echo Use "CREATE EXTENSION pg_trgm" to load this file. \quit
@@ -176,3 +176,13 @@ ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD
 ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD
         OPERATOR        5       pg_catalog.~ (text, text),
         OPERATOR        6       pg_catalog.~* (text, text);
+
+-- Add functions that are new in 9.6 (pg_trgm 1.2).
+
+CREATE FUNCTION gin_trgm_triconsistent(internal, int2, text, int4, internal, internal, internal)
+RETURNS "char"
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE STRICT;
+
+ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD
+        FUNCTION        6      (text,text) gin_trgm_triconsistent (internal, int2, text, int4, internal, internal, internal);
index 2ac51e6890c306184ad62e2e5bc8e6fdbe83dfee..cbf5a186d7e9c50311b0a489b73f958935dad9d9 100644 (file)
@@ -1,5 +1,5 @@
 # pg_trgm extension
 comment = 'text similarity measurement and index searching based on trigrams'
-default_version = '1.1'
+default_version = '1.2'
 module_pathname = '$libdir/pg_trgm'
 relocatable = true
index d524ceaa19e57c3ecf907e7fc4403603f58279a3..6a0731d44ea3432c626dd4dbdf9fd73eb217914e 100644 (file)
@@ -14,6 +14,7 @@ PG_FUNCTION_INFO_V1(gin_extract_trgm);
 PG_FUNCTION_INFO_V1(gin_extract_value_trgm);
 PG_FUNCTION_INFO_V1(gin_extract_query_trgm);
 PG_FUNCTION_INFO_V1(gin_trgm_consistent);
+PG_FUNCTION_INFO_V1(gin_trgm_triconsistent);
 
 /*
  * This function can only be called if a pre-9.1 version of the GIN operator
@@ -235,3 +236,94 @@ gin_trgm_consistent(PG_FUNCTION_ARGS)
 
    PG_RETURN_BOOL(res);
 }
+
+/*
+ * In all cases, GIN_TRUE is at least as favorable to inclusion as
+ * GIN_MAYBE. If no better option is available, simply treat
+ * GIN_MAYBE as if it were GIN_TRUE and apply the same test as the binary
+ * consistent function.
+ */
+Datum
+gin_trgm_triconsistent(PG_FUNCTION_ARGS)
+{
+   GinTernaryValue  *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
+   StrategyNumber strategy = PG_GETARG_UINT16(1);
+
+   /* text    *query = PG_GETARG_TEXT_P(2); */
+   int32       nkeys = PG_GETARG_INT32(3);
+   Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
+   GinTernaryValue res = GIN_MAYBE;
+   int32       i,
+               ntrue;
+   bool       *boolcheck;
+
+   switch (strategy)
+   {
+       case SimilarityStrategyNumber:
+           /* Count the matches */
+           ntrue = 0;
+           for (i = 0; i < nkeys; i++)
+           {
+               if (check[i] != GIN_FALSE)
+                   ntrue++;
+           }
+#ifdef DIVUNION
+           res = (nkeys == ntrue) ? GIN_MAYBE : (((((float4) ntrue) / ((float4) (nkeys - ntrue))) >= trgm_limit) ? GIN_MAYBE : GIN_FALSE);
+#else
+           res = (nkeys == 0) ? GIN_FALSE : (((((float4) ntrue) / ((float4) nkeys)) >= trgm_limit) ? GIN_MAYBE : GIN_FALSE);
+#endif
+           break;
+       case ILikeStrategyNumber:
+#ifndef IGNORECASE
+           elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
+#endif
+           /* FALL THRU */
+       case LikeStrategyNumber:
+           /* Check if all extracted trigrams are presented. */
+           res = GIN_MAYBE;
+           for (i = 0; i < nkeys; i++)
+           {
+               if (check[i] == GIN_FALSE)
+               {
+                   res = GIN_FALSE;
+                   break;
+               }
+           }
+           break;
+       case RegExpICaseStrategyNumber:
+#ifndef IGNORECASE
+           elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
+#endif
+           /* FALL THRU */
+       case RegExpStrategyNumber:
+           if (nkeys < 1)
+           {
+               /* Regex processing gave no result: do full index scan */
+               res = GIN_MAYBE;
+           }
+           else
+           {
+               /*
+                * As trigramsMatchGraph implements a montonic boolean function,
+                * promoting all GIN_MAYBE keys to GIN_TRUE will give a
+                * conservative result.
+                */
+               boolcheck = (bool *) palloc(sizeof(bool) * nkeys);
+               for (i = 0; i < nkeys; i++)
+                   boolcheck[i] = (check[i] != GIN_FALSE);
+               if (!trigramsMatchGraph((TrgmPackedGraph *) extra_data[0],
+                                       boolcheck))
+                   res = GIN_FALSE;
+               pfree(boolcheck);
+           }
+           break;
+       default:
+           elog(ERROR, "unrecognized strategy number: %d", strategy);
+           res = GIN_FALSE;        /* keep compiler quiet */
+           break;
+   }
+
+   /* All cases served by this function are inexact */
+   Assert(res != GIN_TRUE);
+   PG_RETURN_GIN_TERNARY_VALUE(res);
+}