Fix handling of clauses incompatible with extended statistics
authorTomas Vondra <tomas.vondra@postgresql.org>
Tue, 6 Apr 2021 14:12:37 +0000 (16:12 +0200)
committerTomas Vondra <tomas.vondra@postgresql.org>
Tue, 6 Apr 2021 14:56:06 +0000 (16:56 +0200)
Handling of incompatible clauses while applying extended statistics was
a bit confused - while handling a mix of compatible and incompatible
clauses it sometimes incorrectly treated the incompatible clauses as
compatible, resulting in a crash.

Fixed by reworking the code applying the selected statistics object to
make it easier to understand, and adding a proper compatibility check.

Reported-by: David Rowley
Discussion: https://postgr.es/m/CAApHDvpYT10-nkSp8xXe-nbO3jmoaRyRFHbzh-RWMfAJynqgpQ%40mail.gmail.com

src/backend/statistics/extended_stats.c
src/backend/statistics/mcv.c
src/test/regress/expected/stats_ext.out
src/test/regress/sql/stats_ext.sql

index dd3c84a91c0413fcfb4a1e008678cc959fa1a7f7..463d44a68a40f8138151d3169a2ebed23158749a 100644 (file)
@@ -1255,6 +1255,10 @@ choose_best_statistics(List *stats, char requiredkind,
                /*
                 * Collect attributes and expressions in remaining (unestimated)
                 * clauses fully covered by this statistic object.
+                *
+                * We know already estimated clauses have both clause_attnums and
+                * clause_exprs set to NULL. We leave the pointers NULL if already
+                * estimated, or we reset them to NULL after estimating the clause.
                 */
                for (i = 0; i < nclauses; i++)
                {
@@ -1758,39 +1762,65 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
                /* record which clauses are simple (single column or expression) */
                simple_clauses = NULL;
 
-               listidx = 0;
+               listidx = -1;
                foreach(l, clauses)
                {
+                       /* Increment the index before we decide if to skip the clause. */
+                       listidx++;
+
                        /*
-                        * If the clause is not already estimated and is compatible with
-                        * the selected statistics object (all attributes and expressions
-                        * covered), mark it as estimated and add it to the list to
-                        * estimate.
+                        * Ignore clauses from which we did not extract any attnums or
+                        * expressions (this needs to be consistent with what we do in
+                        * choose_best_statistics).
+                        *
+                        * This also eliminates already estimated clauses - both those
+                        * estimated before and during applying extended statistics.
+                        *
+                        * XXX This check is needed because both bms_is_subset and
+                        * stat_covers_expressions return true for empty attnums and
+                        * expressions.
                         */
-                       if (!bms_is_member(listidx, *estimatedclauses) &&
-                               bms_is_subset(list_attnums[listidx], stat->keys) &&
-                               stat_covers_expressions(stat, list_exprs[listidx], NULL))
-                       {
-                               /* record simple clauses (single column or expression) */
-                               if ((list_attnums[listidx] == NULL &&
-                                        list_length(list_exprs[listidx]) == 1) ||
-                                       (list_exprs[listidx] == NIL &&
-                                        bms_membership(list_attnums[listidx]) == BMS_SINGLETON))
-                                       simple_clauses = bms_add_member(simple_clauses,
-                                                                                                       list_length(stat_clauses));
-
-                               /* add clause to list and mark as estimated */
-                               stat_clauses = lappend(stat_clauses, (Node *) lfirst(l));
-                               *estimatedclauses = bms_add_member(*estimatedclauses, listidx);
-
-                               bms_free(list_attnums[listidx]);
-                               list_attnums[listidx] = NULL;
-
-                               list_free(list_exprs[listidx]);
-                               list_exprs[listidx] = NULL;
-                       }
+                       if (!list_attnums[listidx] && !list_exprs[listidx])
+                               continue;
 
-                       listidx++;
+                       /*
+                        * The clause was not estimated yet, and we've extracted either
+                        * attnums of expressions from it. Ignore it if it's not fully
+                        * covered by the chosen statistics.
+                        *
+                        * We need to check both attributes and expressions, and reject
+                        * if either is not covered.
+                        */
+                       if (!bms_is_subset(list_attnums[listidx], stat->keys) ||
+                               !stat_covers_expressions(stat, list_exprs[listidx], NULL))
+                               continue;
+
+                       /*
+                        * Now we know the clause is compatible (we have either atttnums
+                        * or expressions extracted from it), and was not estimated yet.
+                        */
+
+                       /* record simple clauses (single column or expression) */
+                       if ((list_attnums[listidx] == NULL &&
+                                list_length(list_exprs[listidx]) == 1) ||
+                               (list_exprs[listidx] == NIL &&
+                                bms_membership(list_attnums[listidx]) == BMS_SINGLETON))
+                               simple_clauses = bms_add_member(simple_clauses,
+                                                                                               list_length(stat_clauses));
+
+                       /* add clause to list and mark it as estimated */
+                       stat_clauses = lappend(stat_clauses, (Node *) lfirst(l));
+                       *estimatedclauses = bms_add_member(*estimatedclauses, listidx);
+
+                       /*
+                        * Reset the pointers, so that choose_best_statistics knows this
+                        * clause was estimated and does not consider it again.
+                        */
+                       bms_free(list_attnums[listidx]);
+                       list_attnums[listidx] = NULL;
+
+                       list_free(list_exprs[listidx]);
+                       list_exprs[listidx] = NULL;
                }
 
                if (is_or)
index 2a00fb48483f477fb328b44c1f1b39fe34e6e11f..9ab3e81a91d7742cc83043e760d8bd6d3d8f6ab0 100644 (file)
@@ -1575,6 +1575,8 @@ mcv_match_expression(Node *expr, Bitmapset *keys, List *exprs, Oid *collid)
                           (idx <= bms_num_members(keys) + list_length(exprs)));
        }
 
+       Assert((idx >= 0) && (idx < bms_num_members(keys) + list_length(exprs)));
+
        return idx;
 }
 
@@ -1654,6 +1656,8 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
                        /* match the attribute/expression to a dimension of the statistic */
                        idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
 
+                       Assert((idx >= 0) && (idx < bms_num_members(keys) + list_length(exprs)));
+
                        /*
                         * Walk through the MCV items and evaluate the current clause. We
                         * can skip items that were already ruled out, and terminate if
index 679fd2d64d483ddde3579940a2bbebfeebb4ac5a..8c214d8dfc557a3c134cdfe14d12e021b622048e 100644 (file)
@@ -2938,6 +2938,25 @@ SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (b
 (1 row)
 
 DROP TABLE expr_stats;
+-- test handling of a mix of compatible and incompatible expressions
+CREATE TABLE expr_stats_incompatible_test (
+    c0 double precision,
+    c1 boolean NOT NULL
+);
+CREATE STATISTICS expr_stat_comp_1 ON c0, c1 FROM expr_stats_incompatible_test;
+INSERT INTO expr_stats_incompatible_test VALUES (1234,false), (5678,true);
+ANALYZE expr_stats_incompatible_test;
+SELECT c0 FROM ONLY expr_stats_incompatible_test WHERE
+(
+  upper('x') LIKE ('x'||('[0,1]'::int4range))
+  AND
+  (c0 IN (0, 1) OR c1)
+);
+ c0 
+----
+(0 rows)
+
+DROP TABLE expr_stats_incompatible_test;
 -- Permission tests. Users should not be able to see specific data values in
 -- the extended statistics, if they lack permission to see those values in
 -- the underlying table.
index 7e092571ca00bf7e2cf1d4935a10f6a0be0410da..e033080d4fb388c03019eb4e3c95b6093e11ef7b 100644 (file)
@@ -1470,6 +1470,25 @@ SELECT * FROM check_estimated_rows('SELECT * FROM expr_stats WHERE a = 0 AND (b
 
 DROP TABLE expr_stats;
 
+-- test handling of a mix of compatible and incompatible expressions
+CREATE TABLE expr_stats_incompatible_test (
+    c0 double precision,
+    c1 boolean NOT NULL
+);
+
+CREATE STATISTICS expr_stat_comp_1 ON c0, c1 FROM expr_stats_incompatible_test;
+
+INSERT INTO expr_stats_incompatible_test VALUES (1234,false), (5678,true);
+ANALYZE expr_stats_incompatible_test;
+
+SELECT c0 FROM ONLY expr_stats_incompatible_test WHERE
+(
+  upper('x') LIKE ('x'||('[0,1]'::int4range))
+  AND
+  (c0 IN (0, 1) OR c1)
+);
+
+DROP TABLE expr_stats_incompatible_test;
 
 -- Permission tests. Users should not be able to see specific data values in
 -- the extended statistics, if they lack permission to see those values in