summaryrefslogtreecommitdiff
path: root/contrib/ltree
diff options
context:
space:
mode:
authorTom Lane2020-04-01 14:32:33 +0000
committerTom Lane2020-04-01 14:32:33 +0000
commita80818605e5447b9b846590c3d3fab99060cb53e (patch)
tree87b0877f0893bcc5b6a11455d6edc4a7f7644e01 /contrib/ltree
parentd8653f468789a75627c2fc82e73e2755ad8d1fb4 (diff)
Improve selectivity estimation for assorted match-style operators.
Quite a few matching operators such as JSONB's @> used "contsel" and "contjoinsel" as their selectivity estimators. That was a bad idea, because (a) contsel is only a stub, yielding a fixed default estimate, and (b) that default is 0.001, meaning we estimate these operators as five times more selective than equality, which is surely pretty silly. There's a good model for improving this in ltree's ltreeparentsel(): for any "var OP constant" query, we can try applying the operator to all of the column's MCV and histogram values, taking the latter as being a random sample of the non-MCV values. That code is actually 100% generic, except for the question of exactly what default selectivity ought to be plugged in when we don't have stats. Hence, migrate the guts of ltreeparentsel() into the core code, provide wrappers "matchingsel" and "matchingjoinsel" with a more-appropriate default estimate, and use those for the non-geometric operators that formerly used contsel (mostly JSONB containment operators and tsquery matching). Also apply this code to some match-like operators in hstore, ltree, and pg_trgm, including the former users of ltreeparentsel as well as ones that improperly used contsel. Since commit 911e70207 just created new versions of those extensions that we haven't released yet, we can sneak this change into those new versions instead of having to create an additional generation of update scripts. Patch by me, reviewed by Alexey Bashtanov Discussion: https://postgr.es/m/12237.1582833074@sss.pgh.pa.us
Diffstat (limited to 'contrib/ltree')
-rw-r--r--contrib/ltree/ltree--1.1--1.2.sql81
-rw-r--r--contrib/ltree/ltree_op.c102
2 files changed, 88 insertions, 95 deletions
diff --git a/contrib/ltree/ltree--1.1--1.2.sql b/contrib/ltree/ltree--1.1--1.2.sql
index 7b4ea99867..186381e61d 100644
--- a/contrib/ltree/ltree--1.1--1.2.sql
+++ b/contrib/ltree/ltree--1.1--1.2.sql
@@ -19,3 +19,84 @@ ADD FUNCTION 10 (ltree) ltree_gist_options (internal);
ALTER OPERATOR FAMILY gist__ltree_ops USING gist
ADD FUNCTION 10 (_ltree) _ltree_gist_options (internal);
+ALTER OPERATOR < (ltree, ltree)
+ SET (RESTRICT = scalarltsel, JOIN = scalarltjoinsel);
+ALTER OPERATOR <= (ltree, ltree)
+ SET (RESTRICT = scalarlesel, JOIN = scalarlejoinsel);
+ALTER OPERATOR >= (ltree, ltree)
+ SET (RESTRICT = scalargesel, JOIN = scalargejoinsel);
+ALTER OPERATOR > (ltree, ltree)
+ SET (RESTRICT = scalargtsel, JOIN = scalargtjoinsel);
+
+ALTER OPERATOR @> (ltree, ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ^@> (ltree, ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR <@ (ltree, ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ^<@ (ltree, ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ~ (ltree, lquery)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ~ (lquery, ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ^~ (ltree, lquery)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ^~ (lquery, ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ? (ltree, _lquery)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ? (_lquery, ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ^? (ltree, _lquery)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ^? (_lquery, ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR @ (ltree, ltxtquery)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR @ (ltxtquery, ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ^@ (ltree, ltxtquery)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ^@ (ltxtquery, ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR @> (_ltree, ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR <@ (ltree, _ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR <@ (_ltree, ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR @> (ltree, _ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ~ (_ltree, lquery)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ~ (lquery, _ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ? (_ltree, _lquery)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ? (_lquery, _ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR @ (_ltree, ltxtquery)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR @ (ltxtquery, _ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ^@> (_ltree, ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ^<@ (ltree, _ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ^<@ (_ltree, ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ^@> (ltree, _ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ^~ (_ltree, lquery)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ^~ (lquery, _ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ^? (_ltree, _lquery)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ^? (_lquery, _ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ^@ (_ltree, ltxtquery)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ^@ (ltxtquery, _ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
diff --git a/contrib/ltree/ltree_op.c b/contrib/ltree/ltree_op.c
index 34e6e4b2aa..4ac2ed5e54 100644
--- a/contrib/ltree/ltree_op.c
+++ b/contrib/ltree/ltree_op.c
@@ -566,10 +566,11 @@ ltree2text(PG_FUNCTION_ARGS)
}
-#define DEFAULT_PARENT_SEL 0.001
-
/*
* ltreeparentsel - Selectivity of parent relationship for ltree data types.
+ *
+ * This function is not used anymore, if the ltree extension has been
+ * updated to 1.2 or later.
*/
Datum
ltreeparentsel(PG_FUNCTION_ARGS)
@@ -578,101 +579,12 @@ ltreeparentsel(PG_FUNCTION_ARGS)
Oid operator = PG_GETARG_OID(1);
List *args = (List *) PG_GETARG_POINTER(2);
int varRelid = PG_GETARG_INT32(3);
- VariableStatData vardata;
- Node *other;
- bool varonleft;
double selec;
- /*
- * If expression is not variable <@ something or something <@ variable,
- * then punt and return a default estimate.
- */
- if (!get_restriction_variable(root, args, varRelid,
- &vardata, &other, &varonleft))
- PG_RETURN_FLOAT8(DEFAULT_PARENT_SEL);
-
- /*
- * If the something is a NULL constant, assume operator is strict and
- * return zero, ie, operator will never return TRUE.
- */
- if (IsA(other, Const) &&
- ((Const *) other)->constisnull)
- {
- ReleaseVariableStats(vardata);
- PG_RETURN_FLOAT8(0.0);
- }
-
- if (IsA(other, Const))
- {
- /* Variable is being compared to a known non-null constant */
- Datum constval = ((Const *) other)->constvalue;
- FmgrInfo contproc;
- double mcvsum;
- double mcvsel;
- double nullfrac;
- int hist_size;
-
- fmgr_info(get_opcode(operator), &contproc);
-
- /*
- * Is the constant "<@" to any of the column's most common values?
- */
- mcvsel = mcv_selectivity(&vardata, &contproc, constval, varonleft,
- &mcvsum);
-
- /*
- * If the histogram is large enough, see what fraction of it the
- * constant is "<@" to, and assume that's representative of the
- * non-MCV population. Otherwise use the default selectivity for the
- * non-MCV population.
- */
- selec = histogram_selectivity(&vardata, &contproc,
- constval, varonleft,
- 10, 1, &hist_size);
- if (selec < 0)
- {
- /* Nope, fall back on default */
- selec = DEFAULT_PARENT_SEL;
- }
- else if (hist_size < 100)
- {
- /*
- * For histogram sizes from 10 to 100, we combine the histogram
- * and default selectivities, putting increasingly more trust in
- * the histogram for larger sizes.
- */
- double hist_weight = hist_size / 100.0;
-
- selec = selec * hist_weight +
- DEFAULT_PARENT_SEL * (1.0 - hist_weight);
- }
-
- /* In any case, don't believe extremely small or large estimates. */
- if (selec < 0.0001)
- selec = 0.0001;
- else if (selec > 0.9999)
- selec = 0.9999;
-
- if (HeapTupleIsValid(vardata.statsTuple))
- nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata.statsTuple))->stanullfrac;
- else
- nullfrac = 0.0;
-
- /*
- * Now merge the results from the MCV and histogram calculations,
- * realizing that the histogram covers only the non-null values that
- * are not listed in MCV.
- */
- selec *= 1.0 - nullfrac - mcvsum;
- selec += mcvsel;
- }
- else
- selec = DEFAULT_PARENT_SEL;
-
- ReleaseVariableStats(vardata);
-
- /* result should be in range, but make sure... */
- CLAMP_PROBABILITY(selec);
+ /* Use generic restriction selectivity logic, with default 0.001. */
+ selec = generic_restriction_selectivity(root, operator,
+ args, varRelid,
+ 0.001);
PG_RETURN_FLOAT8((float8) selec);
}