static Selectivity prefix_selectivity(PlannerInfo *root,
VariableStatData *vardata,
Oid eqopr, Oid ltopr, Oid geopr,
+ Oid collation,
Const *prefixcon);
static Selectivity like_selectivity(const char *patt, int pattlen,
bool case_insensitive);
* something binary-compatible but different.) We can use it to identify
* the comparison operators and the required type of the comparison
* constant, much as in match_pattern_prefix().
- *
- * NOTE: this logic does not consider collations. Ideally we'd force use
- * of "C" collation, but since ANALYZE only generates statistics for the
- * column's specified collation, we have little choice but to use those.
- * But our results are so approximate anyway that it probably hardly
- * matters.
*/
vartype = vardata.vartype;
/*
* Pattern specifies an exact match, so estimate as for '='
*/
- result = var_eq_const(&vardata, eqopr, prefix->constvalue,
+ result = var_eq_const(&vardata, eqopr, collation, prefix->constvalue,
false, true, false);
}
else
opfuncid = get_opcode(oprid);
fmgr_info(opfuncid, &opproc);
- selec = histogram_selectivity(&vardata, &opproc, constval, true,
+ selec = histogram_selectivity(&vardata, &opproc, collation,
+ constval, true,
10, 1, &hist_size);
/* If not at least 100 entries, use the heuristic method */
if (pstatus == Pattern_Prefix_Partial)
prefixsel = prefix_selectivity(root, &vardata,
eqopr, ltopr, geopr,
+ collation,
prefix);
else
prefixsel = 1.0;
* directly to the result selectivity. Also add up the total fraction
* represented by MCV entries.
*/
- mcv_selec = mcv_selectivity(&vardata, &opproc, constval, true,
+ mcv_selec = mcv_selectivity(&vardata, &opproc, collation,
+ constval, true,
&sumcommon);
/*
* population represented by the histogram --- the caller must fold this
* together with info about MCVs and NULLs.
*
- * We use the specified btree comparison operators to do the estimation.
+ * We use the given comparison operators and collation to do the estimation.
* The given variable and Const must be of the associated datatype(s).
*
* XXX Note: we make use of the upper bound to estimate operator selectivity
static Selectivity
prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
Oid eqopr, Oid ltopr, Oid geopr,
+ Oid collation,
Const *prefixcon)
{
Selectivity prefixsel;
FmgrInfo opproc;
- AttStatsSlot sslot;
Const *greaterstrcon;
Selectivity eq_sel;
prefixsel = ineq_histogram_selectivity(root, vardata,
&opproc, true, true,
+ collation,
prefixcon->constvalue,
prefixcon->consttype);
return DEFAULT_MATCH_SEL;
}
- /*-------
- * If we can create a string larger than the prefix, say
- * "x < greaterstr". We try to generate the string referencing the
- * collation of the var's statistics, but if that's not available,
- * use DEFAULT_COLLATION_OID.
- *-------
+ /*
+ * If we can create a string larger than the prefix, say "x < greaterstr".
*/
- if (HeapTupleIsValid(vardata->statsTuple) &&
- get_attstatsslot(&sslot, vardata->statsTuple,
- STATISTIC_KIND_HISTOGRAM, InvalidOid, 0))
- /* sslot.stacoll is set up */ ;
- else
- sslot.stacoll = DEFAULT_COLLATION_OID;
fmgr_info(get_opcode(ltopr), &opproc);
- greaterstrcon = make_greater_string(prefixcon, &opproc, sslot.stacoll);
+ greaterstrcon = make_greater_string(prefixcon, &opproc, collation);
if (greaterstrcon)
{
Selectivity topsel;
topsel = ineq_histogram_selectivity(root, vardata,
&opproc, false, false,
+ collation,
greaterstrcon->constvalue,
greaterstrcon->consttype);
* probably off the end of the histogram, and thus we probably got a very
* small estimate from the >= condition; so we still need to clamp.
*/
- eq_sel = var_eq_const(vardata, eqopr, prefixcon->constvalue,
+ eq_sel = var_eq_const(vardata, eqopr, collation, prefixcon->constvalue,
false, true, false);
prefixsel = Max(prefixsel, eq_sel);
* (if any) is passed using the standard fmgr mechanism, so that the estimator
* function can fetch it with PG_GET_COLLATION(). Note, however, that all
* statistics in pg_statistic are currently built using the relevant column's
- * collation. Thus, in most cases where we are looking at statistics, we
- * should ignore the operator collation and use the stats entry's collation.
- * We expect that the error induced by doing this is usually not large enough
- * to justify complicating matters. In any case, doing otherwise would yield
- * entirely garbage results for ordered stats data such as histograms.
+ * collation.
*----------
*/
get_index_stats_hook_type get_index_stats_hook = NULL;
static double eqsel_internal(PG_FUNCTION_ARGS, bool negate);
-static double eqjoinsel_inner(Oid opfuncoid,
+static double eqjoinsel_inner(Oid opfuncoid, Oid collation,
VariableStatData *vardata1, VariableStatData *vardata2,
double nd1, double nd2,
bool isdefault1, bool isdefault2,
AttStatsSlot *sslot1, AttStatsSlot *sslot2,
Form_pg_statistic stats1, Form_pg_statistic stats2,
bool have_mcvs1, bool have_mcvs2);
-static double eqjoinsel_semi(Oid opfuncoid,
+static double eqjoinsel_semi(Oid opfuncoid, Oid collation,
VariableStatData *vardata1, VariableStatData *vardata2,
double nd1, double nd2,
bool isdefault1, bool isdefault2,
static void examine_simple_variable(PlannerInfo *root, Var *var,
VariableStatData *vardata);
static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata,
- Oid sortop, Datum *min, Datum *max);
+ Oid sortop, Oid collation,
+ Datum *min, Datum *max);
static bool get_actual_variable_range(PlannerInfo *root,
VariableStatData *vardata,
- Oid sortop,
+ Oid sortop, Oid collation,
Datum *min, Datum *max);
static bool get_actual_variable_endpoint(Relation heapRel,
Relation indexRel,
Oid operator = PG_GETARG_OID(1);
List *args = (List *) PG_GETARG_POINTER(2);
int varRelid = PG_GETARG_INT32(3);
+ Oid collation = PG_GET_COLLATION();
VariableStatData vardata;
Node *other;
bool varonleft;
* in the query.)
*/
if (IsA(other, Const))
- selec = var_eq_const(&vardata, operator,
+ selec = var_eq_const(&vardata, operator, collation,
((Const *) other)->constvalue,
((Const *) other)->constisnull,
varonleft, negate);
else
- selec = var_eq_non_const(&vardata, operator, other,
+ selec = var_eq_non_const(&vardata, operator, collation, other,
varonleft, negate);
ReleaseVariableStats(vardata);
* This is exported so that some other estimation functions can use it.
*/
double
-var_eq_const(VariableStatData *vardata, Oid operator,
+var_eq_const(VariableStatData *vardata, Oid operator, Oid collation,
Datum constval, bool constisnull,
bool varonleft, bool negate)
{
* eqproc returns NULL, though really equality functions should
* never do that.
*/
- InitFunctionCallInfoData(*fcinfo, &eqproc, 2, sslot.stacoll,
+ InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation,
NULL, NULL);
fcinfo->args[0].isnull = false;
fcinfo->args[1].isnull = false;
* This is exported so that some other estimation functions can use it.
*/
double
-var_eq_non_const(VariableStatData *vardata, Oid operator,
+var_eq_non_const(VariableStatData *vardata, Oid operator, Oid collation,
Node *other,
bool varonleft, bool negate)
{
*/
static double
scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, bool iseq,
+ Oid collation,
VariableStatData *vardata, Datum constval, Oid consttype)
{
Form_pg_statistic stats;
* to the result selectivity. Also add up the total fraction represented
* by MCV entries.
*/
- mcv_selec = mcv_selectivity(vardata, &opproc, constval, true,
+ mcv_selec = mcv_selectivity(vardata, &opproc, collation, constval, true,
&sumcommon);
/*
*/
hist_selec = ineq_histogram_selectivity(root, vardata,
&opproc, isgt, iseq,
+ collation,
constval, consttype);
/*
* if there is no MCV list.
*/
double
-mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
+mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Oid collation,
Datum constval, bool varonleft,
double *sumcommonp)
{
* operators that can return NULL. A small side benefit is to not
* need to re-initialize the fcinfo struct from scratch each time.
*/
- InitFunctionCallInfoData(*fcinfo, opproc, 2, sslot.stacoll,
+ InitFunctionCallInfoData(*fcinfo, opproc, 2, collation,
NULL, NULL);
fcinfo->args[0].isnull = false;
fcinfo->args[1].isnull = false;
* prudent to clamp the result range, ie, disbelieve exact 0 or 1 outputs.
*/
double
-histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
+histogram_selectivity(VariableStatData *vardata,
+ FmgrInfo *opproc, Oid collation,
Datum constval, bool varonleft,
int min_hist_size, int n_skip,
int *hist_size)
* is to not need to re-initialize the fcinfo struct from scratch
* each time.
*/
- InitFunctionCallInfoData(*fcinfo, opproc, 2, sslot.stacoll,
+ InitFunctionCallInfoData(*fcinfo, opproc, 2, collation,
NULL, NULL);
fcinfo->args[0].isnull = false;
fcinfo->args[1].isnull = false;
* Otherwise, fall back to the default selectivity provided by the caller.
*/
double
-generic_restriction_selectivity(PlannerInfo *root, Oid oproid,
+generic_restriction_selectivity(PlannerInfo *root, Oid oproid, Oid collation,
List *args, int varRelid,
double default_selectivity)
{
/*
* Calculate the selectivity for the column's most common values.
*/
- mcvsel = mcv_selectivity(&vardata, &opproc, constval, varonleft,
+ mcvsel = mcv_selectivity(&vardata, &opproc, collation,
+ constval, varonleft,
&mcvsum);
/*
* population. Otherwise use the default selectivity for the non-MCV
* population.
*/
- selec = histogram_selectivity(&vardata, &opproc,
+ selec = histogram_selectivity(&vardata, &opproc, collation,
constval, varonleft,
10, 1, &hist_size);
if (selec < 0)
ineq_histogram_selectivity(PlannerInfo *root,
VariableStatData *vardata,
FmgrInfo *opproc, bool isgt, bool iseq,
+ Oid collation,
Datum constval, Oid consttype)
{
double hist_selec;
* column type. However, to make that work we will need to figure out
* which staop to search for --- it's not necessarily the one we have at
* hand! (For example, we might have a '<=' operator rather than the '<'
- * operator that will appear in staop.) For now, assume that whatever
- * appears in pg_statistic is sorted the same way our operator sorts, or
- * the reverse way if isgt is true.
+ * operator that will appear in staop.) The collation might not agree
+ * either. For now, just assume that whatever appears in pg_statistic is
+ * sorted the same way our operator sorts, or the reverse way if isgt is
+ * true. This could result in a bogus estimate, but it still seems better
+ * than falling back to the default estimate.
*/
if (HeapTupleIsValid(vardata->statsTuple) &&
statistic_proc_security_check(vardata, opproc->fn_oid) &&
have_end = get_actual_variable_range(root,
vardata,
sslot.staop,
+ collation,
&sslot.values[0],
&sslot.values[1]);
have_end = get_actual_variable_range(root,
vardata,
sslot.staop,
+ collation,
&sslot.values[0],
NULL);
else if (probe == sslot.nvalues - 1 && sslot.nvalues > 2)
have_end = get_actual_variable_range(root,
vardata,
sslot.staop,
+ collation,
NULL,
&sslot.values[probe]);
ltcmp = DatumGetBool(FunctionCall2Coll(opproc,
- sslot.stacoll,
+ collation,
sslot.values[probe],
constval));
if (isgt)
* values to a uniform comparison scale, and do a linear
* interpolation within this bin.
*/
- if (convert_to_scalar(constval, consttype, sslot.stacoll,
+ if (convert_to_scalar(constval, consttype, collation,
&val,
sslot.values[i - 1], sslot.values[i],
vardata->vartype,
Oid operator = PG_GETARG_OID(1);
List *args = (List *) PG_GETARG_POINTER(2);
int varRelid = PG_GETARG_INT32(3);
+ Oid collation = PG_GET_COLLATION();
VariableStatData vardata;
Node *other;
bool varonleft;
}
/* The rest of the work is done by scalarineqsel(). */
- selec = scalarineqsel(root, operator, isgt, iseq,
+ selec = scalarineqsel(root, operator, isgt, iseq, collation,
&vardata, constval, consttype);
ReleaseVariableStats(vardata);
* A boolean variable V is equivalent to the clause V = 't', so we
* compute the selectivity as if that is what we have.
*/
- selec = var_eq_const(&vardata, BooleanEqualOperator,
+ selec = var_eq_const(&vardata, BooleanEqualOperator, InvalidOid,
BoolGetDatum(true), false, true, false);
}
else
JoinType jointype = (JoinType) PG_GETARG_INT16(3);
#endif
SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
+ Oid collation = PG_GET_COLLATION();
double selec;
double selec_inner;
VariableStatData vardata1;
}
/* We need to compute the inner-join selectivity in all cases */
- selec_inner = eqjoinsel_inner(opfuncoid,
+ selec_inner = eqjoinsel_inner(opfuncoid, collation,
&vardata1, &vardata2,
nd1, nd2,
isdefault1, isdefault2,
inner_rel = find_join_input_rel(root, sjinfo->min_righthand);
if (!join_is_reversed)
- selec = eqjoinsel_semi(opfuncoid,
+ selec = eqjoinsel_semi(opfuncoid, collation,
&vardata1, &vardata2,
nd1, nd2,
isdefault1, isdefault2,
Oid commop = get_commutator(operator);
Oid commopfuncoid = OidIsValid(commop) ? get_opcode(commop) : InvalidOid;
- selec = eqjoinsel_semi(commopfuncoid,
+ selec = eqjoinsel_semi(commopfuncoid, collation,
&vardata2, &vardata1,
nd2, nd1,
isdefault2, isdefault1,
* that it's worth trying to distinguish them here.
*/
static double
-eqjoinsel_inner(Oid opfuncoid,
+eqjoinsel_inner(Oid opfuncoid, Oid collation,
VariableStatData *vardata1, VariableStatData *vardata2,
double nd1, double nd2,
bool isdefault1, bool isdefault2,
* returns NULL, though really equality functions should never do
* that.
*/
- InitFunctionCallInfoData(*fcinfo, &eqproc, 2, sslot1->stacoll,
+ InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation,
NULL, NULL);
fcinfo->args[0].isnull = false;
fcinfo->args[1].isnull = false;
* Unlike eqjoinsel_inner, we have to cope with opfuncoid being InvalidOid.
*/
static double
-eqjoinsel_semi(Oid opfuncoid,
+eqjoinsel_semi(Oid opfuncoid, Oid collation,
VariableStatData *vardata1, VariableStatData *vardata2,
double nd1, double nd2,
bool isdefault1, bool isdefault2,
* returns NULL, though really equality functions should never do
* that.
*/
- InitFunctionCallInfoData(*fcinfo, &eqproc, 2, sslot1->stacoll,
+ InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation,
NULL, NULL);
fcinfo->args[0].isnull = false;
fcinfo->args[1].isnull = false;
Oid op_lefttype;
Oid op_righttype;
Oid opno,
+ collation,
lsortop,
rsortop,
lstatop,
if (!is_opclause(clause))
return; /* shouldn't happen */
opno = ((OpExpr *) clause)->opno;
+ collation = ((OpExpr *) clause)->inputcollid;
left = get_leftop((Expr *) clause);
right = get_rightop((Expr *) clause);
if (!right)
/* Try to get ranges of both inputs */
if (!isgt)
{
- if (!get_variable_range(root, &leftvar, lstatop,
+ if (!get_variable_range(root, &leftvar, lstatop, collation,
&leftmin, &leftmax))
goto fail; /* no range available from stats */
- if (!get_variable_range(root, &rightvar, rstatop,
+ if (!get_variable_range(root, &rightvar, rstatop, collation,
&rightmin, &rightmax))
goto fail; /* no range available from stats */
}
else
{
/* need to swap the max and min */
- if (!get_variable_range(root, &leftvar, lstatop,
+ if (!get_variable_range(root, &leftvar, lstatop, collation,
&leftmax, &leftmin))
goto fail; /* no range available from stats */
- if (!get_variable_range(root, &rightvar, rstatop,
+ if (!get_variable_range(root, &rightvar, rstatop, collation,
&rightmax, &rightmin))
goto fail; /* no range available from stats */
}
* fraction that's <= the right-side maximum value. But only believe
* non-default estimates, else stick with our 1.0.
*/
- selec = scalarineqsel(root, leop, isgt, true, &leftvar,
+ selec = scalarineqsel(root, leop, isgt, true, collation, &leftvar,
rightmax, op_righttype);
if (selec != DEFAULT_INEQ_SEL)
*leftend = selec;
/* And similarly for the right variable. */
- selec = scalarineqsel(root, revleop, isgt, true, &rightvar,
+ selec = scalarineqsel(root, revleop, isgt, true, collation, &rightvar,
leftmax, op_lefttype);
if (selec != DEFAULT_INEQ_SEL)
*rightend = selec;
* minimum value. But only believe non-default estimates, else stick with
* our own default.
*/
- selec = scalarineqsel(root, ltop, isgt, false, &leftvar,
+ selec = scalarineqsel(root, ltop, isgt, false, collation, &leftvar,
rightmin, op_righttype);
if (selec != DEFAULT_INEQ_SEL)
*leftstart = selec;
/* And similarly for the right variable. */
- selec = scalarineqsel(root, revltop, isgt, false, &rightvar,
+ selec = scalarineqsel(root, revltop, isgt, false, collation, &rightvar,
leftmin, op_lefttype);
if (selec != DEFAULT_INEQ_SEL)
*rightstart = selec;
Oid operator = PG_GETARG_OID(1);
List *args = (List *) PG_GETARG_POINTER(2);
int varRelid = PG_GETARG_INT32(3);
+ Oid collation = PG_GET_COLLATION();
double selec;
/* Use generic restriction selectivity logic. */
- selec = generic_restriction_selectivity(root, operator,
+ selec = generic_restriction_selectivity(root, operator, collation,
args, varRelid,
DEFAULT_MATCHING_SEL);
*
* sortop is the "<" comparison operator to use. This should generally
* be "<" not ">", as only the former is likely to be found in pg_statistic.
+ * The collation must be specified too.
*/
static bool
-get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
+get_variable_range(PlannerInfo *root, VariableStatData *vardata,
+ Oid sortop, Oid collation,
Datum *min, Datum *max)
{
Datum tmin = 0;
* before enabling this.
*/
#ifdef NOT_USED
- if (get_actual_variable_range(root, vardata, sortop, min, max))
+ if (get_actual_variable_range(root, vardata, sortop, collation, min, max))
return true;
#endif
*
* If there is a histogram that is sorted with some other operator than
* the one we want, fail --- this suggests that there is data we can't
- * use.
+ * use. XXX consider collation too.
*/
if (get_attstatsslot(&sslot, vardata->statsTuple,
STATISTIC_KIND_HISTOGRAM, sortop,
continue;
}
if (DatumGetBool(FunctionCall2Coll(&opproc,
- sslot.stacoll,
+ collation,
sslot.values[i], tmin)))
{
tmin = sslot.values[i];
tmin_is_mcv = true;
}
if (DatumGetBool(FunctionCall2Coll(&opproc,
- sslot.stacoll,
+ collation,
tmax, sslot.values[i])))
{
tmax = sslot.values[i];
* If no data available, return false.
*
* sortop is the "<" comparison operator to use.
+ * collation is the required collation.
*/
static bool
get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
- Oid sortop,
+ Oid sortop, Oid collation,
Datum *min, Datum *max)
{
bool have_data = false;
continue;
/*
- * The first index column must match the desired variable and sort
- * operator --- but we can use a descending-order index.
+ * The first index column must match the desired variable, sortop, and
+ * collation --- but we can use a descending-order index.
*/
+ if (collation != index->indexcollations[0])
+ continue; /* test first 'cause it's cheapest */
if (!match_index_to_operand(vardata->var, 0, index))
continue;
switch (get_op_opfamily_strategy(sortop, index->sortopfamily[0]))