Prevent integer overflow when forming tuple width estimates.
authorTom Lane <tgl@sss.pgh.pa.us>
Tue, 19 Dec 2023 16:12:16 +0000 (11:12 -0500)
committerTom Lane <tgl@sss.pgh.pa.us>
Tue, 19 Dec 2023 16:12:16 +0000 (11:12 -0500)
It's at least theoretically possible to overflow int32 when adding up
column width estimates to make a row width estimate.  (The bug example
isn't terribly convincing as a real use-case, but perhaps wide joins
would provide a more plausible route to trouble.)  This'd lead to
assertion failures or silly planner behavior.  To forestall it, make
the relevant functions compute their running sums in int64 arithmetic
and then clamp to int32 range at the end.  We can reasonably assume
that MaxAllocSize is a hard limit on actual tuple width, so clamping
to that is simply a correction for dubious input values, and there's
no need to go as far as widening width variables to int64 everywhere.

Per bug #18247 from RekGRpth.  There've been no reports of this issue
arising in practical cases, so I feel no need to back-patch.

Richard Guo and Tom Lane

Discussion: https://postgr.es/m/18247-11ac477f02954422@postgresql.org

src/backend/optimizer/path/costsize.c
src/backend/optimizer/plan/planner.c
src/backend/optimizer/util/placeholder.c
src/backend/optimizer/util/plancat.c
src/backend/optimizer/util/relnode.c
src/include/optimizer/optimizer.h

index 5227346aeb1aa07e292a81ecb3a40cfd6ea93df8..22635d2927006504aacff22923b746b831893dfe 100644 (file)
@@ -218,6 +218,35 @@ clamp_row_est(double nrows)
    return nrows;
 }
 
+/*
+ * clamp_width_est
+ *     Force a tuple-width estimate to a sane value.
+ *
+ * The planner represents datatype width and tuple width estimates as int32.
+ * When summing column width estimates to create a tuple width estimate,
+ * it's possible to reach integer overflow in edge cases.  To ensure sane
+ * behavior, we form such sums in int64 arithmetic and then apply this routine
+ * to clamp to int32 range.
+ */
+int32
+clamp_width_est(int64 tuple_width)
+{
+   /*
+    * Anything more than MaxAllocSize is clearly bogus, since we could not
+    * create a tuple that large.
+    */
+   if (tuple_width > MaxAllocSize)
+       return (int32) MaxAllocSize;
+
+   /*
+    * Unlike clamp_row_est, we just Assert that the value isn't negative,
+    * rather than masking such errors.
+    */
+   Assert(tuple_width >= 0);
+
+   return (int32) tuple_width;
+}
+
 /*
  * clamp_cardinality_to_long
  *     Cast a Cardinality value to a sane long value.
@@ -6101,7 +6130,7 @@ static void
 set_rel_width(PlannerInfo *root, RelOptInfo *rel)
 {
    Oid         reloid = planner_rt_fetch(rel->relid, root)->relid;
-   int32       tuple_width = 0;
+   int64       tuple_width = 0;
    bool        have_wholerow_var = false;
    ListCell   *lc;
 
@@ -6213,7 +6242,7 @@ set_rel_width(PlannerInfo *root, RelOptInfo *rel)
     */
    if (have_wholerow_var)
    {
-       int32       wholerow_width = MAXALIGN(SizeofHeapTupleHeader);
+       int64       wholerow_width = MAXALIGN(SizeofHeapTupleHeader);
 
        if (reloid != InvalidOid)
        {
@@ -6230,7 +6259,7 @@ set_rel_width(PlannerInfo *root, RelOptInfo *rel)
                wholerow_width += rel->attr_widths[i - rel->min_attr];
        }
 
-       rel->attr_widths[0 - rel->min_attr] = wholerow_width;
+       rel->attr_widths[0 - rel->min_attr] = clamp_width_est(wholerow_width);
 
        /*
         * Include the whole-row Var as part of the output tuple.  Yes, that
@@ -6239,8 +6268,7 @@ set_rel_width(PlannerInfo *root, RelOptInfo *rel)
        tuple_width += wholerow_width;
    }
 
-   Assert(tuple_width >= 0);
-   rel->reltarget->width = tuple_width;
+   rel->reltarget->width = clamp_width_est(tuple_width);
 }
 
 /*
@@ -6258,7 +6286,7 @@ set_rel_width(PlannerInfo *root, RelOptInfo *rel)
 PathTarget *
 set_pathtarget_cost_width(PlannerInfo *root, PathTarget *target)
 {
-   int32       tuple_width = 0;
+   int64       tuple_width = 0;
    ListCell   *lc;
 
    /* Vars are assumed to have cost zero, but other exprs do not */
@@ -6282,8 +6310,7 @@ set_pathtarget_cost_width(PlannerInfo *root, PathTarget *target)
        }
    }
 
-   Assert(tuple_width >= 0);
-   target->width = tuple_width;
+   target->width = clamp_width_est(tuple_width);
 
    return target;
 }
index a8cea5efe1415bdeb231a967cee6d0b36a933218..6f45efde21d487df6959406ec58c595f9329b83e 100644 (file)
@@ -4610,6 +4610,7 @@ create_one_window_path(PlannerInfo *root,
             * Note: a WindowFunc adds nothing to the target's eval costs; but
             * we do need to account for the increase in tlist width.
             */
+           int64       tuple_width = window_target->width;
            ListCell   *lc2;
 
            window_target = copy_pathtarget(window_target);
@@ -4618,8 +4619,9 @@ create_one_window_path(PlannerInfo *root,
                WindowFunc *wfunc = lfirst_node(WindowFunc, lc2);
 
                add_column_to_pathtarget(window_target, (Expr *) wfunc, 0);
-               window_target->width += get_typavgwidth(wfunc->wintype, -1);
+               tuple_width += get_typavgwidth(wfunc->wintype, -1);
            }
+           window_target->width = clamp_width_est(tuple_width);
        }
        else
        {
index b1723578e6f9dbcc772f47c81f1775c735f329a1..66b5e2b1e7665f82f0b9072dabffc9daae438f13 100644 (file)
@@ -375,6 +375,7 @@ add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel,
                            SpecialJoinInfo *sjinfo)
 {
    Relids      relids = joinrel->relids;
+   int64       tuple_width = joinrel->reltarget->width;
    ListCell   *lc;
 
    foreach(lc, root->placeholder_list)
@@ -419,7 +420,7 @@ add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel,
                    cost_qual_eval_node(&cost, (Node *) phv->phexpr, root);
                    joinrel->reltarget->cost.startup += cost.startup;
                    joinrel->reltarget->cost.per_tuple += cost.per_tuple;
-                   joinrel->reltarget->width += phinfo->ph_width;
+                   tuple_width += phinfo->ph_width;
                }
            }
 
@@ -443,6 +444,8 @@ add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel,
                                phinfo->ph_lateral);
        }
    }
+
+   joinrel->reltarget->width = clamp_width_est(tuple_width);
 }
 
 /*
index 7159c775fbd6612dd5e338d5a5007e286e84a96f..0e35b9d0ab9f1ca61c53c158383788efe51f257e 100644 (file)
@@ -1137,7 +1137,7 @@ estimate_rel_size(Relation rel, int32 *attr_widths,
 int32
 get_rel_data_width(Relation rel, int32 *attr_widths)
 {
-   int32       tuple_width = 0;
+   int64       tuple_width = 0;
    int         i;
 
    for (i = 1; i <= RelationGetNumberOfAttributes(rel); i++)
@@ -1167,7 +1167,7 @@ get_rel_data_width(Relation rel, int32 *attr_widths)
        tuple_width += item_width;
    }
 
-   return tuple_width;
+   return clamp_width_est(tuple_width);
 }
 
 /*
index 5d83f60eb9ad59a51d29d5d22f6166bcdea9acc0..9dfeb4ffd4bc3696caa899bb97206da43d1effec 100644 (file)
@@ -22,6 +22,7 @@
 #include "optimizer/clauses.h"
 #include "optimizer/cost.h"
 #include "optimizer/inherit.h"
+#include "optimizer/optimizer.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "optimizer/placeholder.h"
@@ -1092,6 +1093,7 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
                    bool can_null)
 {
    Relids      relids = joinrel->relids;
+   int64       tuple_width = joinrel->reltarget->width;
    ListCell   *vars;
    ListCell   *lc;
 
@@ -1144,7 +1146,7 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
                joinrel->reltarget->exprs = lappend(joinrel->reltarget->exprs,
                                                    phv);
                /* Bubbling up the precomputed result has cost zero */
-               joinrel->reltarget->width += phinfo->ph_width;
+               tuple_width += phinfo->ph_width;
            }
            continue;
        }
@@ -1165,7 +1167,7 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
                list_nth(root->row_identity_vars, var->varattno - 1);
 
            /* Update reltarget width estimate from RowIdentityVarInfo */
-           joinrel->reltarget->width += ridinfo->rowidwidth;
+           tuple_width += ridinfo->rowidwidth;
        }
        else
        {
@@ -1181,7 +1183,7 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
                continue;       /* nope, skip it */
 
            /* Update reltarget width estimate from baserel's attr_widths */
-           joinrel->reltarget->width += baserel->attr_widths[ndx];
+           tuple_width += baserel->attr_widths[ndx];
        }
 
        /*
@@ -1221,6 +1223,8 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
 
        /* Vars have cost zero, so no need to adjust reltarget->cost */
    }
+
+   joinrel->reltarget->width = clamp_width_est(tuple_width);
 }
 
 /*
index 6e8b81c51d5e19488e1e87693379f28cfcf0ce5b..da50044bf148f901eb116b162e5662b60b1894eb 100644 (file)
@@ -90,6 +90,7 @@ extern PGDLLIMPORT double recursive_worktable_factor;
 extern PGDLLIMPORT int effective_cache_size;
 
 extern double clamp_row_est(double nrows);
+extern int32 clamp_width_est(int64 tuple_width);
 extern long clamp_cardinality_to_long(Cardinality x);
 
 /* in path/indxpath.c: */