Repair error noticed by Roberto Cornacchia: selectivity code
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 9 Sep 1999 02:36:04 +0000 (02:36 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 9 Sep 1999 02:36:04 +0000 (02:36 +0000)
was rejecting negative attnums as bogus, which of course they are not.
Add code to get_attdisbursion to produce a useful value for OID attribute,
since VACUUM does not store stats for system attributes.
Also, repair bug that's been in eqjoinsel for a long time: it was taking
the max of the two columns' disbursions, whereas it should use the min.

src/backend/optimizer/path/clausesel.c
src/backend/optimizer/util/clauses.c
src/backend/optimizer/util/plancat.c
src/backend/utils/adt/selfuncs.c
src/backend/utils/cache/lsyscache.c

index 23998a54bff3151b2fe793cdfa927f8a7133929b..5264d50834205c4d0819b2a036c6ef54edc93cc7 100644 (file)
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.25 1999/07/25 23:07:24 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.26 1999/09/09 02:35:47 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -230,24 +230,24 @@ compute_clause_selec(Query *root, Node *clause)
                                int                     flag;
 
                                get_relattval(clause, 0, &relidx, &attno, &constval, &flag);
-                               if (relidx <= 0 || attno <= 0)
+                               if (relidx && attno)
+                                       s1 = (Cost) restriction_selectivity(oprrest,
+                                                                                                               opno,
+                                                                                                               getrelid(relidx,
+                                                                                                                                root->rtable),
+                                                                                                               attno,
+                                                                                                               constval,
+                                                                                                               flag);
+                               else
                                {
                                        /*
-                                        * attno can be Invalid if the clause had a function in it,
+                                        * attno can be 0 if the clause had a function in it,
                                         * i.e.   WHERE myFunc(f) = 10
                                         *
                                         * XXX should be FIXED to use function selectivity
                                         */
                                        s1 = (Cost) (0.5);
                                }
-                               else
-                                       s1 = (Cost) restriction_selectivity(oprrest,
-                                                                                                               opno,
-                                                                                                               getrelid(relidx,
-                                                                                                                                root->rtable),
-                                                                                                               attno,
-                                                                                                               constval,
-                                                                                                               flag);
                        }
                }
                else
@@ -274,7 +274,8 @@ compute_clause_selec(Query *root, Node *clause)
                                                        attno2;
 
                                get_rels_atts(clause, &relid1, &attno1, &relid2, &attno2);
-                               if (relid1 > 0 && relid2 > 0 && attno1 > 0 && attno2 > 0)
+                               if (relid1 && relid2 && attno1 && attno2)
+
                                        s1 = (Cost) join_selectivity(oprjoin,
                                                                                                 opno,
                                                                                                 getrelid(relid1,
index 1ab09217c5928e32a8c56cd1a582d3399df58169..876a0dafb72cc14f6cd1eea89482600a78a184f0 100644 (file)
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.50 1999/08/26 05:09:05 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.51 1999/09/09 02:35:53 tgl Exp $
  *
  * HISTORY
  *       AUTHOR                        DATE                    MAJOR EVENT
@@ -116,7 +116,10 @@ make_opclause(Oper *op, Var *leftop, Var *rightop)
  *
  * Returns the left operand of a clause of the form (op expr expr)
  *             or (op expr)
- * NB: it is assumed (for now) that all expr must be Var nodes
+ *
+ * NB: for historical reasons, the result is declared Var *, even
+ * though many callers can cope with results that are not Vars.
+ * The result really ought to be declared Expr * or Node *.
  */
 Var *
 get_leftop(Expr *clause)
@@ -549,8 +552,11 @@ NumRelids(Node *clause)
  *                     if the "something" is a constant, the value of the constant
  *                     flags indicating whether a constant was found, and on which side.
  *             Default values are returned if the expression is too complicated,
- *             specifically -1 for the relid and attno, 0 for the constant value.
- *             Note that InvalidAttrNumber is *not* -1, but 0.
+ *             specifically 0 for the relid and attno, 0 for the constant value.
+ *
+ *             Note that negative attno values are *not* invalid, but represent
+ *             system attributes such as OID.  It's sufficient to check for relid=0
+ *             to determine whether the routine succeeded.
  */
 void
 get_relattval(Node *clause,
@@ -610,8 +616,8 @@ get_relattval(Node *clause,
        {
                /* Duh, it's too complicated for me... */
 default_results:
-               *relid = -1;
-               *attno = -1;
+               *relid = 0;
+               *attno = 0;
                *constval = 0;
                *flag = 0;
                return;
@@ -663,7 +669,7 @@ static int is_single_func(Node *node)
  *             for a joinclause.
  *
  * If the clause is not of the form (var op var) or if any of the vars
- * refer to nested attributes, then -1's are returned.
+ * refer to nested attributes, then zeroes are returned.
  *
  */
 void
@@ -674,10 +680,10 @@ get_rels_atts(Node *clause,
                          AttrNumber *attno2)
 {
        /* set default values */
-       *relid1 = -1;
-       *attno1 = -1;
-       *relid2 = -1;
-       *attno2 = -1;
+       *relid1 = 0;
+       *attno1 = 0;
+       *relid2 = 0;
+       *attno2 = 0;
 
        if (is_opclause(clause))
        {
index a428e027f611dceb9f8d0e8f2043bf630b6523e5..9ca188bce62d42e8c7c8ac60a50400fdeaf71cd0 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.36 1999/07/25 23:07:26 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.37 1999/09/09 02:35:53 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -288,7 +288,7 @@ index_selectivity(Query *root,
 }
 
 /*
- * restriction_selectivity in lisp system.
+ * restriction_selectivity
  *
  *       NOTE: The routine is now merged with RestrictionClauseSelectivity
  *       as defined in plancat.c
@@ -298,7 +298,7 @@ index_selectivity(Query *root,
  * operator relation, by calling the function manager.
  *
  * XXX The assumption in the selectivity procedures is that if the
- *             relation OIDs or attribute numbers are -1, then the clause
+ *             relation OIDs or attribute numbers are 0, then the clause
  *             isn't of the form (op var const).
  */
 Cost
@@ -337,7 +337,7 @@ restriction_selectivity(Oid functionObjectId,
  *       information.
  *
  * XXX The assumption in the selectivity procedures is that if the
- *             relation OIDs or attribute numbers are -1, then the clause
+ *             relation OIDs or attribute numbers are 0, then the clause
  *             isn't of the form (op var var).
  */
 Cost
index 883da87a275bdc91d98a99e53a31078d4a020c6e..1b2d58c075ed487de5fa54f1560f196adbb6baac 100644 (file)
@@ -10,7 +10,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.39 1999/08/21 00:56:18 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.40 1999/09/09 02:35:58 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -31,7 +31,7 @@
 #include "utils/syscache.h"
 
 /* N is not a valid var/constant or relation id */
-#define NONVALUE(N)            ((N) == -1)
+#define NONVALUE(N)            ((N) == 0)
 
 /* are we looking at a functional index selectivity request? */
 #define FunctionalSelectivity(nIndKeys,attNum) ((attNum)==InvalidAttrNumber)
@@ -170,7 +170,9 @@ eqsel(Oid opid,
                else
                {
                        /* No VACUUM ANALYZE stats available, so make a guess using
-                        * the disbursion stat (if we have that, which is unlikely...)
+                        * the disbursion stat (if we have that, which is unlikely
+                        * for a normal attribute; but for a system attribute we may
+                        * be able to estimate it).
                         */
                        selec = get_attdisbursion(relid, attno, 0.01);
                }
@@ -366,7 +368,7 @@ eqjoinsel(Oid opid,
        float64         result;
        float64data num1,
                                num2,
-                               max;
+                               min;
 
        result = (float64) palloc(sizeof(float64data));
        if (NONVALUE(attno1) || NONVALUE(relid1) ||
@@ -376,11 +378,23 @@ eqjoinsel(Oid opid,
        {
                num1 = get_attdisbursion(relid1, attno1, 0.01);
                num2 = get_attdisbursion(relid2, attno2, 0.01);
-               max = (num1 > num2) ? num1 : num2;
-               if (max <= 0)
-                       *result = 1.0;
-               else
-                       *result = max;
+               /*
+                * The join selectivity cannot be more than num2, since each
+                * tuple in table 1 could match no more than num2 fraction of
+                * tuples in table 2 (and that's only if the table-1 tuple
+                * matches the most common value in table 2, so probably it's
+                * less).  By the same reasoning it is not more than num1.
+                * The min is therefore an upper bound.
+                *
+                * XXX can we make a better estimate here?  Using the nullfrac
+                * statistic might be helpful, for example.  Assuming the operator
+                * is strict (does not succeed for null inputs) then the selectivity
+                * couldn't be more than (1-nullfrac1)*(1-nullfrac2), which might
+                * be usefully small if there are many nulls.  How about applying
+                * the operator to the most common values?
+                */
+               min = (num1 < num2) ? num1 : num2;
+               *result = min;
        }
        return result;
 }
index 4f9cd3fefa71784cb7d3dcc14b38a8a04e7c657a..75994a31f27ca7a8ff80a39a4e5d702c0edaaae8 100644 (file)
@@ -6,7 +6,7 @@
  * Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.33 1999/08/16 02:06:25 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.34 1999/09/09 02:36:04 tgl Exp $
  *
  * NOTES
  *       Eventually, the index information should go through here, too.
@@ -223,6 +223,14 @@ get_attdisbursion(Oid relid, AttrNumber attnum, double min_estimate)
        if (disbursion < 0.0)           /* VACUUM thinks there are no duplicates */
                return 1.0 / (double) ntuples;
 
+       /*
+        * VACUUM ANALYZE does not compute disbursion for system attributes,
+        * but some of them can reasonably be assumed unique anyway.
+        */
+       if (attnum == ObjectIdAttributeNumber ||
+               attnum == SelfItemPointerAttributeNumber)
+               return 1.0 / (double) ntuples;
+
        /*
         * VACUUM ANALYZE has not been run for this table.
         * Produce an estimate = 1/numtuples.  This may produce