Remove useless "rough estimate" path from mcelem_array_contained_selec.

author Tom Lane <tgl@sss.pgh.pa.us>

Sun, 4 Mar 2012 21:03:38 +0000 (16:03 -0500)

committer Tom Lane <tgl@sss.pgh.pa.us>

Sun, 4 Mar 2012 21:03:38 +0000 (16:03 -0500)
author Tom Lane <tgl@sss.pgh.pa.us>
Sun, 4 Mar 2012 21:03:38 +0000 (16:03 -0500)
committer Tom Lane <tgl@sss.pgh.pa.us>
Sun, 4 Mar 2012 21:03:38 +0000 (16:03 -0500)
diff --git a/src/backend/utils/adt/array_selfuncs.c b/src/backend/utils/adt/array_selfuncs.c

index 3916de4bfb61df8309102a862a9d91e0302bac06..bc4ebd20749997ff3441b7824118dbde320b5524 100644 (file)
--- a/src/backend/utils/adt/array_selfuncs.c
+++ b/src/backend/utils/adt/array_selfuncs.c
@@ -242,8 +242,7 @@ scalararraysel_containment(PlannerInfo *root,
  }
  
  /*
- * arraycontsel -- restriction selectivity for "arraycolumn @> const",
- * "arraycolumn && const" or "arraycolumn <@ const"
+ * arraycontsel -- restriction selectivity for array @>, &&, <@ operators
   */
  Datum
  arraycontsel(PG_FUNCTION_ARGS)
@@ -323,8 +322,7 @@ arraycontsel(PG_FUNCTION_ARGS)
  }
  
  /*
- * arraycontjoinsel -- join selectivity for "arraycolumn @> const",
- * "arraycolumn && const" or "arraycolumn <@ const"
+ * arraycontjoinsel -- join selectivity for array @>, &&, <@ operators
   */
  Datum
  arraycontjoinsel(PG_FUNCTION_ARGS)
@@ -744,6 +742,10 @@ mcelem_array_contained_selec(Datum *mcelem, int nmcelem,
         if (numbers == NULL || nnumbers != nmcelem + 3)
                 return DEFAULT_CONTAIN_SEL;
  
+       /* Can't do much without a count histogram, either */
+       if (hist == NULL || nhist < 3)
+               return DEFAULT_CONTAIN_SEL;
+
         /*
          * Grab some of the summary statistics that compute_array_stats() stores:
          * lowest frequency, frequency of null elements, and average distinct
@@ -751,11 +753,7 @@ mcelem_array_contained_selec(Datum *mcelem, int nmcelem,
          */
         minfreq = numbers[nmcelem];
         nullelem_freq = numbers[nmcelem + 2];
-
-       if (hist && nhist > 0)
-               avg_count = hist[nhist - 1];
-       else
-               avg_count = 10.0f;              /* default assumption */
+       avg_count = hist[nhist - 1];
  
         /*
          * "rest" will be the sum of the frequencies of all elements not
@@ -853,83 +851,71 @@ mcelem_array_contained_selec(Datum *mcelem, int nmcelem,
          */
         mult *= exp(-rest);
  
-       /* Check we have nonempty distinct element count histogram */
-       if (hist && nhist >= 3)
-       {
-               /*----------
-                * Using the distinct element count histogram requires
-                *              O(unique_nitems * (nmcelem + unique_nitems))
-                * operations.  Beyond a certain computational cost threshold, it's
-                * reasonable to sacrifice accuracy for decreased planning time.
-                * We limit the number of operations to EFFORT * nmcelem; since
-                * nmcelem is limited by the column's statistics target, the work
-                * done is user-controllable.
-                *
-                * If the number of operations would be too large, we can reduce it
-                * without losing all accuracy by reducing unique_nitems and
-                * considering only the most-common elements of the constant array.
-                * To make the results exactly match what we would have gotten with
-                * only those elements to start with, we'd have to remove any
-                * discarded elements' frequencies from "mult", but since this is only
-                * an approximation anyway, we don't bother with that.  Therefore it's
-                * sufficient to qsort elem_selec[] and take the largest elements.
-                * (They will no longer match up with the elements of array_data[],
-                * but we don't care.)
-                *----------
-                */
+       /*----------
+        * Using the distinct element count histogram requires
+        *              O(unique_nitems * (nmcelem + unique_nitems))
+        * operations.  Beyond a certain computational cost threshold, it's
+        * reasonable to sacrifice accuracy for decreased planning time.  We limit
+        * the number of operations to EFFORT * nmcelem; since nmcelem is limited
+        * by the column's statistics target, the work done is user-controllable.
+        *
+        * If the number of operations would be too large, we can reduce it
+        * without losing all accuracy by reducing unique_nitems and considering
+        * only the most-common elements of the constant array.  To make the
+        * results exactly match what we would have gotten with only those
+        * elements to start with, we'd have to remove any discarded elements'
+        * frequencies from "mult", but since this is only an approximation
+        * anyway, we don't bother with that.  Therefore it's sufficient to qsort
+        * elem_selec[] and take the largest elements.  (They will no longer match
+        * up with the elements of array_data[], but we don't care.)
+        *----------
+        */
  #define EFFORT 100
  
-               if ((nmcelem + unique_nitems) > 0 &&
-                       unique_nitems > EFFORT * nmcelem / (nmcelem + unique_nitems))
-               {
-                       /*
-                        * Use the quadratic formula to solve for largest allowable N;
-                        * we have A = 1, B = nmcelem, C = - EFFORT * nmcelem.
-                        */
-                       double  b = (double) nmcelem;
-                       int             n;
-
-                       n = (int) ((sqrt(b * b + 4 * EFFORT * b) - b) / 2);
-
-                       /* Sort, then take just the first n elements */
-                       qsort(elem_selec, unique_nitems, sizeof(float),
-                                 float_compare_desc);
-                       unique_nitems = n;
-               }
-
+       if ((nmcelem + unique_nitems) > 0 &&
+               unique_nitems > EFFORT * nmcelem / (nmcelem + unique_nitems))
+       {
                 /*
-                * Calculate probabilities of each distinct element count for both
-                * mcelems and constant elements.  At this point, assume independent
-                * element occurrence.
+                * Use the quadratic formula to solve for largest allowable N.  We
+                * have A = 1, B = nmcelem, C = - EFFORT * nmcelem.
                  */
-               dist = calc_distr(elem_selec, unique_nitems, unique_nitems, 0.0f);
-               mcelem_dist = calc_distr(numbers, nmcelem, unique_nitems, rest);
+               double  b = (double) nmcelem;
+               int             n;
  
-               /* ignore hist[nhist-1], which is the avg not a histogram member */
-               hist_part = calc_hist(hist, nhist - 1, unique_nitems);
+               n = (int) ((sqrt(b * b + 4 * EFFORT * b) - b) / 2);
  
-               selec = 0.0f;
-               for (i = 0; i <= unique_nitems; i++)
-               {
-                       /*
-                        * mult * dist[i] / mcelem_dist[i] gives us probability of qual
-                        * matching from assumption of independent element occurrence with
-                        * the condition that distinct element count = i.
-                        */
-                       if (mcelem_dist[i] > 0)
-                               selec += hist_part[i] * mult * dist[i] / mcelem_dist[i];
-               }
-
-               pfree(dist);
-               pfree(mcelem_dist);
-               pfree(hist_part);
+               /* Sort, then take just the first n elements */
+               qsort(elem_selec, unique_nitems, sizeof(float),
+                         float_compare_desc);
+               unique_nitems = n;
         }
-       else
+
+       /*
+        * Calculate probabilities of each distinct element count for both
+        * mcelems and constant elements.  At this point, assume independent
+        * element occurrence.
+        */
+       dist = calc_distr(elem_selec, unique_nitems, unique_nitems, 0.0f);
+       mcelem_dist = calc_distr(numbers, nmcelem, unique_nitems, rest);
+
+       /* ignore hist[nhist-1], which is the average not a histogram member */
+       hist_part = calc_hist(hist, nhist - 1, unique_nitems);
+
+       selec = 0.0f;
+       for (i = 0; i <= unique_nitems; i++)
         {
-               /* We don't have histogram.  Use a rough estimate. */
-               selec = mult;
+               /*
+                * mult * dist[i] / mcelem_dist[i] gives us probability of qual
+                * matching from assumption of independent element occurrence with
+                * the condition that distinct element count = i.
+                */
+               if (mcelem_dist[i] > 0)
+                       selec += hist_part[i] * mult * dist[i] / mcelem_dist[i];
         }
  
+       pfree(dist);
+       pfree(mcelem_dist);
+       pfree(hist_part);
         pfree(elem_selec);
  
         /* Take into account occurrence of NULL element. */
author	Tom Lane <tgl@sss.pgh.pa.us>
	Sun, 4 Mar 2012 21:03:38 +0000 (16:03 -0500)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Sun, 4 Mar 2012 21:03:38 +0000 (16:03 -0500)