summaryrefslogtreecommitdiff
path: root/contrib
diff options
context:
space:
mode:
authorTom Lane2006-09-20 19:50:21 +0000
committerTom Lane2006-09-20 19:50:21 +0000
commitbfd1ffa948e676f0587239a36e71f15690ef2cde (patch)
tree0f29dd2001a93fa708142d94f62f940967b09f6b /contrib
parent06b33f0ee8481a83f9e62c0fc77ab16afb4aeec1 (diff)
Change patternsel (LIKE/regex selectivity estimation) so that if there
is a large enough histogram, it will use the number of matches in the histogram to derive a selectivity estimate, rather than the admittedly pretty bogus heuristics involving examining the pattern contents. I set 'large enough' at 100, but perhaps we should change that later. Also apply the same technique in contrib/ltree's <@ and @> estimator. Per discussion with Stefan Kaltenbrunner and Matteo Beccati.
Diffstat (limited to 'contrib')
-rw-r--r--contrib/ltree/ltree_op.c40
1 files changed, 36 insertions, 4 deletions
diff --git a/contrib/ltree/ltree_op.c b/contrib/ltree/ltree_op.c
index 86178d0a9e2..f4348e0161f 100644
--- a/contrib/ltree/ltree_op.c
+++ b/contrib/ltree/ltree_op.c
@@ -1,13 +1,14 @@
/*
* op function for ltree
* Teodor Sigaev <teodor@stack.net>
- * $PostgreSQL: pgsql/contrib/ltree/ltree_op.c,v 1.12 2006/05/30 22:12:13 tgl Exp $
+ * $PostgreSQL: pgsql/contrib/ltree/ltree_op.c,v 1.13 2006/09/20 19:50:21 tgl Exp $
*/
#include "ltree.h"
#include <ctype.h>
+#include "catalog/pg_statistic.h"
#include "utils/lsyscache.h"
#include "utils/selfuncs.h"
#include "utils/syscache.h"
@@ -606,6 +607,7 @@ ltreeparentsel(PG_FUNCTION_ARGS)
FmgrInfo contproc;
double mcvsum;
double mcvsel;
+ double nullfrac;
fmgr_info(get_opcode(operator), &contproc);
@@ -616,10 +618,40 @@ ltreeparentsel(PG_FUNCTION_ARGS)
&mcvsum);
/*
- * We have the exact selectivity for values appearing in the MCV list;
- * use the default selectivity for the rest of the population.
+ * If the histogram is large enough, see what fraction of it the
+ * constant is "<@" to, and assume that's representative of the
+ * non-MCV population. Otherwise use the default selectivity for
+ * the non-MCV population.
*/
- selec = mcvsel + DEFAULT_PARENT_SEL * (1.0 - mcvsum);
+ selec = histogram_selectivity(&vardata, &contproc,
+ constval, varonleft,
+ 100, 1);
+ if (selec < 0)
+ {
+ /* Nope, fall back on default */
+ selec = DEFAULT_PARENT_SEL;
+ }
+ else
+ {
+ /* Yes, but don't believe extremely small or large estimates. */
+ if (selec < 0.0001)
+ selec = 0.0001;
+ else if (selec > 0.9999)
+ selec = 0.9999;
+ }
+
+ if (HeapTupleIsValid(vardata.statsTuple))
+ nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata.statsTuple))->stanullfrac;
+ else
+ nullfrac = 0.0;
+
+ /*
+ * Now merge the results from the MCV and histogram calculations,
+ * realizing that the histogram covers only the non-null values that
+ * are not listed in MCV.
+ */
+ selec *= 1.0 - nullfrac - mcvsum;
+ selec += mcvsel;
}
else
selec = DEFAULT_PARENT_SEL;