diff options
| author | Tom Lane | 2006-09-20 19:50:21 +0000 |
|---|---|---|
| committer | Tom Lane | 2006-09-20 19:50:21 +0000 |
| commit | bfd1ffa948e676f0587239a36e71f15690ef2cde (patch) | |
| tree | 0f29dd2001a93fa708142d94f62f940967b09f6b /contrib | |
| parent | 06b33f0ee8481a83f9e62c0fc77ab16afb4aeec1 (diff) | |
Change patternsel (LIKE/regex selectivity estimation) so that if there
is a large enough histogram, it will use the number of matches in the
histogram to derive a selectivity estimate, rather than the admittedly
pretty bogus heuristics involving examining the pattern contents. I set
'large enough' at 100, but perhaps we should change that later. Also
apply the same technique in contrib/ltree's <@ and @> estimator. Per
discussion with Stefan Kaltenbrunner and Matteo Beccati.
Diffstat (limited to 'contrib')
| -rw-r--r-- | contrib/ltree/ltree_op.c | 40 |
1 files changed, 36 insertions, 4 deletions
diff --git a/contrib/ltree/ltree_op.c b/contrib/ltree/ltree_op.c index 86178d0a9e2..f4348e0161f 100644 --- a/contrib/ltree/ltree_op.c +++ b/contrib/ltree/ltree_op.c @@ -1,13 +1,14 @@ /* * op function for ltree * Teodor Sigaev <teodor@stack.net> - * $PostgreSQL: pgsql/contrib/ltree/ltree_op.c,v 1.12 2006/05/30 22:12:13 tgl Exp $ + * $PostgreSQL: pgsql/contrib/ltree/ltree_op.c,v 1.13 2006/09/20 19:50:21 tgl Exp $ */ #include "ltree.h" #include <ctype.h> +#include "catalog/pg_statistic.h" #include "utils/lsyscache.h" #include "utils/selfuncs.h" #include "utils/syscache.h" @@ -606,6 +607,7 @@ ltreeparentsel(PG_FUNCTION_ARGS) FmgrInfo contproc; double mcvsum; double mcvsel; + double nullfrac; fmgr_info(get_opcode(operator), &contproc); @@ -616,10 +618,40 @@ ltreeparentsel(PG_FUNCTION_ARGS) &mcvsum); /* - * We have the exact selectivity for values appearing in the MCV list; - * use the default selectivity for the rest of the population. + * If the histogram is large enough, see what fraction of it the + * constant is "<@" to, and assume that's representative of the + * non-MCV population. Otherwise use the default selectivity for + * the non-MCV population. */ - selec = mcvsel + DEFAULT_PARENT_SEL * (1.0 - mcvsum); + selec = histogram_selectivity(&vardata, &contproc, + constval, varonleft, + 100, 1); + if (selec < 0) + { + /* Nope, fall back on default */ + selec = DEFAULT_PARENT_SEL; + } + else + { + /* Yes, but don't believe extremely small or large estimates. */ + if (selec < 0.0001) + selec = 0.0001; + else if (selec > 0.9999) + selec = 0.9999; + } + + if (HeapTupleIsValid(vardata.statsTuple)) + nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata.statsTuple))->stanullfrac; + else + nullfrac = 0.0; + + /* + * Now merge the results from the MCV and histogram calculations, + * realizing that the histogram covers only the non-null values that + * are not listed in MCV. + */ + selec *= 1.0 - nullfrac - mcvsum; + selec += mcvsel; } else selec = DEFAULT_PARENT_SEL; |
