const RangeBound *constbound,
const RangeBound *hist, int hist_nvalues,
bool equal);
-static int rbound_bsearch(TypeCacheEntry *typcache, const RangeBound *value,
- const RangeBound *hist, int hist_length, bool equal);
+static int rbound_bsearch(TypeCacheEntry *typcache, const RangeBound *value,
+ const RangeBound *hist, int hist_length, bool equal);
static float8 get_position(TypeCacheEntry *typcache, const RangeBound *value,
const RangeBound *hist1, const RangeBound *hist2);
static float8 get_len_position(double value, double hist1, double hist2);
ATTSTATSSLOT_VALUES)))
return -1.0;
+ /* check that it's a histogram, not just a dummy entry */
+ if (hslot.nvalues < 2)
+ {
+ free_attstatsslot(&hslot);
+ return -1.0;
+ }
+
/*
* Convert histogram of ranges into histograms of its lower and upper
* bounds.
/*
* Both bounds are finite. Assuming the subtype's comparison function
* works sanely, the value must be finite, too, because it lies
- * somewhere between the bounds. If it doesn't, just return something.
+ * somewhere between the bounds. If it doesn't, arbitrarily return
+ * 0.5.
*/
if (value->infinite)
return 0.5;
return 0.5;
/* Calculate relative position using subdiff function. */
- bin_width = DatumGetFloat8(FunctionCall2Coll(
- &typcache->rng_subdiff_finfo,
+ bin_width = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
typcache->rng_collation,
hist2->val,
hist1->val));
- if (bin_width <= 0.0)
- return 0.5; /* zero width bin */
+ if (isnan(bin_width) || bin_width <= 0.0)
+ return 0.5; /* punt for NaN or zero-width bin */
- position = DatumGetFloat8(FunctionCall2Coll(
- &typcache->rng_subdiff_finfo,
+ position = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
typcache->rng_collation,
value->val,
hist1->val))
/ bin_width;
+ if (isnan(position))
+ return 0.5; /* punt for NaN from subdiff, Inf/Inf, etc */
+
/* Relative position must be in [0,1] range */
position = Max(position, 0.0);
position = Min(position, 1.0);
if (!bound1->infinite && !bound2->infinite)
{
/*
- * No bounds are infinite, use subdiff function or return default
+ * Neither bound is infinite, use subdiff function or return default
* value of 1.0 if no subdiff is available.
*/
if (has_subdiff)
- return
- DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
- typcache->rng_collation,
- bound2->val,
- bound1->val));
+ {
+ float8 res;
+
+ res = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
+ typcache->rng_collation,
+ bound2->val,
+ bound1->val));
+ /* Reject possible NaN result, also negative result */
+ if (isnan(res) || res < 0.0)
+ return 1.0;
+ else
+ return res;
+ }
else
return 1.0;
}
}
else
{
- /* One bound is infinite, another is not */
+ /* One bound is infinite, the other is not */
return get_float8_infinity();
}
}
upper_index = rbound_bsearch(typcache, upper, hist_lower, hist_nvalues,
false);
+ /*
+ * If the upper bound value is below the histogram's lower limit, there
+ * are no matches.
+ */
+ if (upper_index < 0)
+ return 0.0;
+
+ /*
+ * If the upper bound value is at or beyond the histogram's upper limit,
+ * start our loop at the last actual bin, as though the upper bound were
+ * within that bin; get_position will clamp its result to 1.0 anyway.
+ * (This corresponds to assuming that the data population above the
+ * histogram's upper limit is empty, exactly like what we just assumed for
+ * the lower limit.)
+ */
+ upper_index = Min(upper_index, hist_nvalues - 2);
+
/*
* Calculate upper_bin_width, ie. the fraction of the (upper_index,
* upper_index + 1) bin which is greater than upper bound of query range
* using linear interpolation of subdiff function.
*/
- if (upper_index >= 0 && upper_index < hist_nvalues - 1)
- upper_bin_width = get_position(typcache, upper,
- &hist_lower[upper_index],
- &hist_lower[upper_index + 1]);
- else
- upper_bin_width = 0.0;
+ upper_bin_width = get_position(typcache, upper,
+ &hist_lower[upper_index],
+ &hist_lower[upper_index + 1]);
/*
* In the loop, dist and prev_dist are the distance of the "current" bin's
* of ranges that contain the constant lower and upper bounds. This uses
* the histograms of range lower bounds and range lengths, on the assumption
* that the range lengths are independent of the lower bounds.
- *
- * Note, this is "var @> const", ie. estimate the fraction of ranges that
- * contain the constant lower and upper bounds.
*/
static double
calc_hist_selectivity_contains(TypeCacheEntry *typcache,
lower_index = rbound_bsearch(typcache, lower, hist_lower, hist_nvalues,
true);
+ /*
+ * If the lower bound value is below the histogram's lower limit, there
+ * are no matches.
+ */
+ if (lower_index < 0)
+ return 0.0;
+
+ /*
+ * If the lower bound value is at or beyond the histogram's upper limit,
+ * start our loop at the last actual bin, as though the upper bound were
+ * within that bin; get_position will clamp its result to 1.0 anyway.
+ * (This corresponds to assuming that the data population above the
+ * histogram's upper limit is empty, exactly like what we just assumed for
+ * the lower limit.)
+ */
+ lower_index = Min(lower_index, hist_nvalues - 2);
+
/*
* Calculate lower_bin_width, ie. the fraction of the of (lower_index,
* lower_index + 1) bin which is greater than lower bound of query range
* using linear interpolation of subdiff function.
*/
- if (lower_index >= 0 && lower_index < hist_nvalues - 1)
- lower_bin_width = get_position(typcache, lower, &hist_lower[lower_index],
- &hist_lower[lower_index + 1]);
- else
- lower_bin_width = 0.0;
+ lower_bin_width = get_position(typcache, lower, &hist_lower[lower_index],
+ &hist_lower[lower_index + 1]);
/*
* Loop through all the lower bound bins, smaller than the query lower