Use multi-variate MCV lists to estimate ScalarArrayOpExpr
authorTomas Vondra <tomas.vondra@postgresql.org>
Sat, 14 Mar 2020 13:56:37 +0000 (14:56 +0100)
committerTomas Vondra <tomas.vondra@postgresql.org>
Sat, 14 Mar 2020 15:13:00 +0000 (16:13 +0100)
Commit 8f321bd16c added support for estimating ScalarArrayOpExpr clauses
(IN/ANY) clauses using functional dependencies. There's no good reason
not to support estimation of these clauses using multi-variate MCV lists
too, so this commits implements that. That makes the behavior consistent
and MCV lists can estimate all variants (ANY/ALL, inequalities, ...).

Author: Tomas Vondra
Review: Dean Rasheed
Discussion: https://www.postgresql.org/message-id/flat/13902317.Eha0YfKkKy%40pierred-pdoc

src/backend/statistics/extended_stats.c
src/backend/statistics/mcv.c
src/include/statistics/extended_stats_internal.h
src/test/regress/expected/stats_ext.out
src/test/regress/sql/stats_ext.sql

index 81864ce03df0cc1221987eabce8eb158c3aeb6f6..c3f0da4e2335e4957b22daaa1c12961997a45367 100644 (file)
@@ -994,7 +994,63 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
                        return false;
 
                /* Check if the expression the right shape (one Var, one Const) */
-               if (!examine_opclause_expression(expr, &var, NULL, NULL))
+               if (!examine_clause_args(expr->args, &var, NULL, NULL))
+                       return false;
+
+               /*
+                * If it's not one of the supported operators ("=", "<", ">", etc.),
+                * just ignore the clause, as it's not compatible with MCV lists.
+                *
+                * This uses the function for estimating selectivity, not the operator
+                * directly (a bit awkward, but well ...).
+                */
+               switch (get_oprrest(expr->opno))
+               {
+                       case F_EQSEL:
+                       case F_NEQSEL:
+                       case F_SCALARLTSEL:
+                       case F_SCALARLESEL:
+                       case F_SCALARGTSEL:
+                       case F_SCALARGESEL:
+                               /* supported, will continue with inspection of the Var */
+                               break;
+
+                       default:
+                               /* other estimators are considered unknown/unsupported */
+                               return false;
+               }
+
+               /*
+                * If there are any securityQuals on the RTE from security barrier
+                * views or RLS policies, then the user may not have access to all the
+                * table's data, and we must check that the operator is leak-proof.
+                *
+                * If the operator is leaky, then we must ignore this clause for the
+                * purposes of estimating with MCV lists, otherwise the operator might
+                * reveal values from the MCV list that the user doesn't have
+                * permission to see.
+                */
+               if (rte->securityQuals != NIL &&
+                       !get_func_leakproof(get_opcode(expr->opno)))
+                       return false;
+
+               return statext_is_compatible_clause_internal(root, (Node *) var,
+                                                                                                        relid, attnums);
+       }
+
+       /* Var IN Array */
+       if (IsA(clause, ScalarArrayOpExpr))
+       {
+               RangeTblEntry      *rte = root->simple_rte_array[relid];
+               ScalarArrayOpExpr  *expr = (ScalarArrayOpExpr *) clause;
+               Var                *var;
+
+               /* Only expressions with two arguments are considered compatible. */
+               if (list_length(expr->args) != 2)
+                       return false;
+
+               /* Check if the expression the right shape (one Var, one Const) */
+               if (!examine_clause_args(expr->args, &var, NULL, NULL))
                        return false;
 
                /*
@@ -1396,7 +1452,7 @@ statext_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid,
  * on which side of the operator we found the Var node.
  */
 bool
-examine_opclause_expression(OpExpr *expr, Var **varp, Const **cstp, bool *varonleftp)
+examine_clause_args(List *args, Var **varp, Const **cstp, bool *varonleftp)
 {
        Var        *var;
        Const  *cst;
@@ -1405,10 +1461,10 @@ examine_opclause_expression(OpExpr *expr, Var **varp, Const **cstp, bool *varonl
                   *rightop;
 
        /* enforced by statext_is_compatible_clause_internal */
-       Assert(list_length(expr->args) == 2);
+       Assert(list_length(args) == 2);
 
-       leftop = linitial(expr->args);
-       rightop = lsecond(expr->args);
+       leftop = linitial(args);
+       rightop = lsecond(args);
 
        /* strip RelabelType from either side of the expression */
        if (IsA(leftop, RelabelType))
index 87e232fdd4e71acfcf65a87ac1617e9f80592a9e..3147d8fedc623c0ba376846ff307a39b74e7bc3d 100644 (file)
@@ -1579,7 +1579,7 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
                        OpExpr     *expr = (OpExpr *) clause;
                        FmgrInfo        opproc;
 
-                       /* valid only after examine_opclause_expression returns true */
+                       /* valid only after examine_clause_args returns true */
                        Var                *var;
                        Const      *cst;
                        bool            varonleft;
@@ -1587,7 +1587,7 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
                        fmgr_info(get_opcode(expr->opno), &opproc);
 
                        /* extract the var and const from the expression */
-                       if (examine_opclause_expression(expr, &var, &cst, &varonleft))
+                       if (examine_clause_args(expr->args, &var, &cst, &varonleft))
                        {
                                int                     idx;
 
@@ -1652,6 +1652,113 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
                                }
                        }
                }
+               else if (IsA(clause, ScalarArrayOpExpr))
+               {
+                       ScalarArrayOpExpr   *expr = (ScalarArrayOpExpr *) clause;
+                       FmgrInfo        opproc;
+
+                       /* valid only after examine_clause_args returns true */
+                       Var                *var;
+                       Const      *cst;
+                       bool            varonleft;
+
+                       fmgr_info(get_opcode(expr->opno), &opproc);
+
+                       /* extract the var and const from the expression */
+                       if (examine_clause_args(expr->args, &var, &cst, &varonleft))
+                       {
+                               int                     idx;
+
+                               ArrayType  *arrayval;
+                               int16           elmlen;
+                               bool            elmbyval;
+                               char            elmalign;
+                               int                     num_elems;
+                               Datum      *elem_values;
+                               bool       *elem_nulls;
+
+                               /* ScalarArrayOpExpr has the Var always on the left */
+                               Assert(varonleft);
+
+                               if (!cst->constisnull)
+                               {
+                                       arrayval = DatumGetArrayTypeP(cst->constvalue);
+                                       get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
+                                                                                &elmlen, &elmbyval, &elmalign);
+                                       deconstruct_array(arrayval,
+                                                                         ARR_ELEMTYPE(arrayval),
+                                                                         elmlen, elmbyval, elmalign,
+                                                                         &elem_values, &elem_nulls, &num_elems);
+                               }
+
+                               /* match the attribute to a dimension of the statistic */
+                               idx = bms_member_index(keys, var->varattno);
+
+                               /*
+                                * Walk through the MCV items and evaluate the current clause.
+                                * We can skip items that were already ruled out, and
+                                * terminate if there are no remaining MCV items that might
+                                * possibly match.
+                                */
+                               for (i = 0; i < mcvlist->nitems; i++)
+                               {
+                                       int                     j;
+                                       bool            match = (expr->useOr ? false : true);
+                                       MCVItem    *item = &mcvlist->items[i];
+
+                                       /*
+                                        * When the MCV item or the Const value is NULL we can treat
+                                        * this as a mismatch. We must not call the operator because
+                                        * of strictness.
+                                        */
+                                       if (item->isnull[idx] || cst->constisnull)
+                                       {
+                                               matches[i] = RESULT_MERGE(matches[i], is_or, false);
+                                               continue;
+                                       }
+
+                                       /*
+                                        * Skip MCV items that can't change result in the bitmap.
+                                        * Once the value gets false for AND-lists, or true for
+                                        * OR-lists, we don't need to look at more clauses.
+                                        */
+                                       if (RESULT_IS_FINAL(matches[i], is_or))
+                                               continue;
+
+                                       for (j = 0; j < num_elems; j++)
+                                       {
+                                               Datum   elem_value = elem_values[j];
+                                               bool    elem_isnull = elem_nulls[j];
+                                               bool    elem_match;
+
+                                               /* NULL values always evaluate as not matching. */
+                                               if (elem_isnull)
+                                               {
+                                                       match = RESULT_MERGE(match, expr->useOr, false);
+                                                       continue;
+                                               }
+
+                                               /*
+                                                * Stop evaluating the array elements once we reach
+                                                * match value that can't change - ALL() is the same
+                                                * as AND-list, ANY() is the same as OR-list.
+                                                */
+                                               if (RESULT_IS_FINAL(match, expr->useOr))
+                                                       break;
+
+                                               elem_match = DatumGetBool(FunctionCall2Coll(&opproc,
+                                                                                                                                       var->varcollid,
+                                                                                                                                       item->values[idx],
+                                                                                                                                       elem_value));
+
+                                               match = RESULT_MERGE(match, expr->useOr, elem_match);
+                                       }
+
+                                       /* update the match bitmap with the result */
+                                       matches[i] = RESULT_MERGE(matches[i], is_or, match);
+                               }
+                       }
+               }
                else if (IsA(clause, NullTest))
                {
                        NullTest   *expr = (NullTest *) clause;
index b512ee908a494e7308338b04b787031fd3ce5635..2b14ab238cbabe6c777c549364a28af0690d4c16 100644 (file)
@@ -96,8 +96,8 @@ extern SortItem *build_sorted_items(int numrows, int *nitems, HeapTuple *rows,
                                                                        TupleDesc tdesc, MultiSortSupport mss,
                                                                        int numattrs, AttrNumber *attnums);
 
-extern bool examine_opclause_expression(OpExpr *expr, Var **varp,
-                                                                               Const **cstp, bool *varonleftp);
+extern bool examine_clause_args(List *args, Var **varp,
+                                                               Const **cstp, bool *varonleftp);
 
 extern Selectivity mcv_clauselist_selectivity(PlannerInfo *root,
                                                                                          StatisticExtInfo *stat,
index 6a628f56809b77fac53ffbeeb09b73129c783eb6..9fa659c71d13d006b6bad102a06b68d7a72d424b 100644 (file)
@@ -827,6 +827,36 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = '
        343 |    200
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')');
+ estimated | actual 
+-----------+--------
+         8 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])');
+ estimated | actual 
+-----------+--------
+         8 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= ANY (ARRAY[1, 2, 3]) AND b IN (''1'', ''2'', ''3'')');
+ estimated | actual 
+-----------+--------
+        26 |    150
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND c > ANY (ARRAY[1, 2, 3])');
+ estimated | actual 
+-----------+--------
+        10 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', ''3'') AND c > ANY (ARRAY[1, 2, 3])');
+ estimated | actual 
+-----------+--------
+         1 |    100
+(1 row)
+
 -- create statistics
 CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists;
 ANALYZE mcv_lists;
@@ -872,6 +902,36 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = '
        200 |    200
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')');
+ estimated | actual 
+-----------+--------
+       200 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])');
+ estimated | actual 
+-----------+--------
+       200 |    200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= ANY (ARRAY[1, 2, 3]) AND b IN (''1'', ''2'', ''3'')');
+ estimated | actual 
+-----------+--------
+       150 |    150
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND c > ANY (ARRAY[1, 2, 3])');
+ estimated | actual 
+-----------+--------
+       100 |    100
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', ''3'') AND c > ANY (ARRAY[1, 2, 3])');
+ estimated | actual 
+-----------+--------
+       100 |    100
+(1 row)
+
 -- we can't use the statistic for OR clauses that are not fully covered (missing 'd' attribute)
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');
  estimated | actual 
index 3de2be500a820d9d65f2711e0799bc15b493693d..0ece39a27979d8d4c826b1bf6b4d09bcbdc911a8 100644 (file)
@@ -461,6 +461,16 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = '
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= ANY (ARRAY[1, 2, 3]) AND b IN (''1'', ''2'', ''3'')');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND c > ANY (ARRAY[1, 2, 3])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', ''3'') AND c > ANY (ARRAY[1, 2, 3])');
+
 -- create statistics
 CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists;
 
@@ -480,6 +490,16 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 4 AND b <
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1');
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= ANY (ARRAY[1, 2, 3]) AND b IN (''1'', ''2'', ''3'')');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND c > ANY (ARRAY[1, 2, 3])');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', ''3'') AND c > ANY (ARRAY[1, 2, 3])');
+
 -- we can't use the statistic for OR clauses that are not fully covered (missing 'd' attribute)
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');