Enhanced containment selectivity function for /contrib/ltree
authorBruce Momjian <bruce@momjian.us>
Wed, 26 Apr 2006 18:28:34 +0000 (18:28 +0000)
committerBruce Momjian <bruce@momjian.us>
Wed, 26 Apr 2006 18:28:34 +0000 (18:28 +0000)
Matteo Beccati

contrib/ltree/ltree.sql.in
src/backend/utils/adt/geo_selfuncs.c
src/backend/utils/adt/selfuncs.c
src/include/catalog/pg_proc.h
src/include/utils/selfuncs.h

index 8e7d5680a847d082b16760d426bb01cc26e01b12..bdb8bdf52cbe9f13475d8e3c162b67d33620cb9b 100644 (file)
@@ -230,7 +230,7 @@ CREATE OPERATOR @> (
    RIGHTARG = ltree,
    PROCEDURE = ltree_isparent,
         COMMUTATOR = '<@',
-        RESTRICT = contsel,
+        RESTRICT = parentsel,
    JOIN = contjoinsel
 );
 
@@ -248,7 +248,7 @@ CREATE OPERATOR <@ (
    RIGHTARG = ltree,
    PROCEDURE = ltree_risparent,
         COMMUTATOR = '@>',
-        RESTRICT = contsel,
+        RESTRICT = parentsel,
    JOIN = contjoinsel
 );
 
index dd652041169f093b4d8f4c736704c92c4d837028..1bc7f0679dd2727287cd485ae11c220c77b9f562 100644 (file)
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/utils/adt/geo_selfuncs.c,v 1.27 2006/03/05 15:58:42 momjian Exp $
+ *   $PostgreSQL: pgsql/src/backend/utils/adt/geo_selfuncs.c,v 1.28 2006/04/26 18:28:29 momjian Exp $
  *
  * XXX These are totally bogus.  Perhaps someone will make them do
  * something reasonable, someday.
@@ -20,7 +20,6 @@
 
 #include "utils/geo_decls.h"
 
-
 /*
  * Selectivity functions for geometric operators.  These are bogus -- unless
  * we know the actual key distribution in the index, we can't make a good
@@ -93,3 +92,4 @@ contjoinsel(PG_FUNCTION_ARGS)
 {
    PG_RETURN_FLOAT8(0.001);
 }
+
index 3eae8171e9bc59bcef05774f796348eab44c77be..810a8f6db1f6a8742c01809c1adfa3cd7e389861 100644 (file)
@@ -15,7 +15,7 @@
  *
  *
  * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.199 2006/04/20 17:50:18 tgl Exp $
+ *   $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.200 2006/04/26 18:28:29 momjian Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -4852,3 +4852,182 @@ gistcostestimate(PG_FUNCTION_ARGS)
 
    PG_RETURN_VOID();
 }
+
+
+#define DEFAULT_PARENT_SEL 0.001
+
+/*
+ * parentsel - Selectivity of parent relationship for ltree data types.
+ */
+Datum
+parentsel(PG_FUNCTION_ARGS)
+{
+   PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+   Oid         operator = PG_GETARG_OID(1);
+   List       *args = (List *) PG_GETARG_POINTER(2);
+   int         varRelid = PG_GETARG_INT32(3);
+   VariableStatData vardata;
+   Node       *other;
+   bool        varonleft;
+   Datum      *values;
+   int         nvalues;
+   float4     *numbers;
+   int         nnumbers;
+   double      selec = 0.0;
+
+   /*
+    * If expression is not variable <@ something or something <@ variable,
+    * then punt and return a default estimate.
+    */
+   if (!get_restriction_variable(root, args, varRelid,
+                                 &vardata, &other, &varonleft))
+       PG_RETURN_FLOAT8(DEFAULT_PARENT_SEL);
+
+   /*
+    * If the something is a NULL constant, assume operator is strict and
+    * return zero, ie, operator will never return TRUE.
+    */
+   if (IsA(other, Const) &&
+       ((Const *) other)->constisnull)
+   {
+       ReleaseVariableStats(vardata);
+       PG_RETURN_FLOAT8(0.0);
+   }
+
+   if (HeapTupleIsValid(vardata.statsTuple))
+   {
+       Form_pg_statistic stats;
+       double      mcvsum = 0.0;
+       double      mcvsel = 0.0;
+       double      hissel = 0.0;
+
+       stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
+
+       if (IsA(other, Const))
+       {
+           /* Variable is being compared to a known non-null constant */
+           Datum       constval = ((Const *) other)->constvalue;
+           bool        match = false;
+           int         i;
+
+           /*
+            * Is the constant "<@" to any of the column's most common values?
+            */
+           if (get_attstatsslot(vardata.statsTuple,
+                                vardata.atttype, vardata.atttypmod,
+                                STATISTIC_KIND_MCV, InvalidOid,
+                                &values, &nvalues,
+                                &numbers, &nnumbers))
+           {
+               FmgrInfo    contproc;
+
+               fmgr_info(get_opcode(operator), &contproc);
+
+               for (i = 0; i < nvalues; i++)
+               {
+                   /* be careful to apply operator right way 'round */
+                   if (varonleft)
+                       match = DatumGetBool(FunctionCall2(&contproc,
+                                                          values[i],
+                                                          constval));
+                   else
+                       match = DatumGetBool(FunctionCall2(&contproc,
+                                                          constval,
+                                                          values[i]));
+
+                   /* calculate total selectivity of all most-common-values */
+                   mcvsum += numbers[i];
+
+                   /* calculate selectivity of matching most-common-values */
+                   if (match)
+                       mcvsel += numbers[i];
+               }
+           }
+           else
+           {
+               /* no most-common-values info available */
+               values = NULL;
+               numbers = NULL;
+               i = nvalues = nnumbers = 0;
+           }
+
+           free_attstatsslot(vardata.atttype, values, nvalues, NULL, 0);
+
+           /*
+            * Is the constant "<@" to any of the column's histogram values?
+            */
+           if (get_attstatsslot(vardata.statsTuple,
+                                vardata.atttype, vardata.atttypmod,
+                                STATISTIC_KIND_HISTOGRAM, InvalidOid,
+                                &values, &nvalues,
+                                NULL, NULL))
+           {
+               FmgrInfo    contproc;
+
+               fmgr_info(get_opcode(operator), &contproc);
+
+               for (i = 0; i < nvalues; i++)
+               {
+                   /* be careful to apply operator right way 'round */
+                   if (varonleft)
+                       match = DatumGetBool(FunctionCall2(&contproc,
+                                                          values[i],
+                                                          constval));
+                   else
+                       match = DatumGetBool(FunctionCall2(&contproc,
+                                                          constval,
+                                                          values[i]));
+                   /* count matching histogram values */
+                   if (match)
+                       hissel++;
+               }
+
+               if (hissel > 0.0)
+               {
+                   /*
+                    * some matching values found inside histogram, divide
+                    * matching entries number by total histogram entries to
+                    * get the histogram related selectivity
+                    */
+                   hissel /= nvalues;
+               }
+           }
+           else
+           {
+               /* no histogram info available */
+               values = NULL;
+               i = nvalues = 0;
+           }
+
+           free_attstatsslot(vardata.atttype, values, nvalues,
+                             NULL, 0);
+
+
+           /*
+            * calculate selectivity based on MCV and histogram result
+            * histogram selectivity needs to be scaled down if there are any
+            * most-common-values
+            */
+           selec = mcvsel + hissel * (1.0 - mcvsum);
+
+           /*
+            * don't return 0.0 selectivity unless all table values are inside
+            * mcv
+            */
+           if (selec == 0.0 && mcvsum != 1.0)
+               selec = DEFAULT_PARENT_SEL;
+       }
+       else
+           selec = DEFAULT_PARENT_SEL;
+   }
+   else
+       selec = DEFAULT_PARENT_SEL;
+
+   ReleaseVariableStats(vardata);
+
+   /* result should be in range, but make sure... */
+   CLAMP_PROBABILITY(selec);
+
+   PG_RETURN_FLOAT8((float8) selec);
+}
+
index 61339eadadc9a299c3a655f2f1e8c2d6389f1804..53bcf3f4e36fd2231baeda2eccbceef62a85808d 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.406 2006/04/25 00:25:20 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.407 2006/04/26 18:28:30 momjian Exp $
  *
  * NOTES
  *   The script catalog/genbki.sh reads this file and generates .bki
@@ -3812,6 +3812,8 @@ DATA(insert OID = 2591 (  gist_circle_consistent PGNSP PGUID 12 f f t f i 3 16 "
 DESCR("GiST support");
 DATA(insert OID = 2592 (  gist_circle_compress PGNSP PGUID 12 f f t f i 1 2281 "2281" _null_ _null_ _null_ gist_circle_compress - _null_ ));
 DESCR("GiST support");
+DATA(insert OID = 2599 (  parentsel              PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_ parentsel - _null_ ));
+DESCR("enhanced restriction selectivity for ltree isparent comparison operators");
 
 
 /*
index 7094e78756c5946d2dd96667aeba8887fd3cfca3..df28d738433f897d5c1e8bf1d628a4183dc91ac5 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.28 2006/03/05 15:59:07 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.29 2006/04/26 18:28:34 momjian Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -134,4 +134,6 @@ extern Datum btcostestimate(PG_FUNCTION_ARGS);
 extern Datum hashcostestimate(PG_FUNCTION_ARGS);
 extern Datum gistcostestimate(PG_FUNCTION_ARGS);
 
+extern Datum parentsel(PG_FUNCTION_ARGS);
+
 #endif   /* SELFUNCS_H */