Refactor merge path generation code.
authorRobert Haas <rhaas@postgresql.org>
Wed, 21 Dec 2016 14:44:33 +0000 (09:44 -0500)
committerRobert Haas <rhaas@postgresql.org>
Wed, 21 Dec 2016 14:45:50 +0000 (09:45 -0500)
This shouldn't change the set of paths that get generated in any
way, but it is preparatory work for further changes to allow a
partial path to be merge-joined witih a non-partial path to produce
a partial join path.

Dilip Kumar, with cosmetic adjustments by me.

src/backend/optimizer/path/joinpath.c

index 96f00fca5bf5cc9315b18d639cd82fe35e5e120c..b5cbcf4d0494e9de14f4637155c6f5eb96539ac9 100644 (file)
@@ -50,6 +50,15 @@ static List *select_mergejoin_clauses(PlannerInfo *root,
                         List *restrictlist,
                         JoinType jointype,
                         bool *mergejoin_allowed);
+static void generate_mergejoin_paths(PlannerInfo *root,
+                        RelOptInfo *joinrel,
+                        RelOptInfo *innerrel,
+                        Path *outerpath,
+                        JoinType jointype,
+                        JoinPathExtraData *extra,
+                        bool useallclauses,
+                        Path *inner_cheapest_total,
+                        List *merge_pathkeys);
 
 
 /*
@@ -776,6 +785,241 @@ sort_inner_and_outer(PlannerInfo *root,
    }
 }
 
+/*
+ * generate_mergejoin_paths
+ * Creates possible mergejoin paths for input outerpath.
+ *
+ * We generate mergejoins if mergejoin clauses are available.  We have
+ * two ways to generate the inner path for a mergejoin: sort the cheapest
+ * inner path, or use an inner path that is already suitably ordered for the
+ * merge.  If we have several mergeclauses, it could be that there is no inner
+ * path (or only a very expensive one) for the full list of mergeclauses, but
+ * better paths exist if we truncate the mergeclause list (thereby discarding
+ * some sort key requirements).  So, we consider truncations of the
+ * mergeclause list as well as the full list.  (Ideally we'd consider all
+ * subsets of the mergeclause list, but that seems way too expensive.)
+ */
+static void
+generate_mergejoin_paths(PlannerInfo *root,
+                        RelOptInfo *joinrel,
+                        RelOptInfo *innerrel,
+                        Path *outerpath,
+                        JoinType jointype,
+                        JoinPathExtraData *extra,
+                        bool useallclauses,
+                        Path *inner_cheapest_total,
+                        List *merge_pathkeys)
+{
+   List       *mergeclauses;
+   List       *innersortkeys;
+   List       *trialsortkeys;
+   Path       *cheapest_startup_inner;
+   Path       *cheapest_total_inner;
+   JoinType    save_jointype = jointype;
+   int         num_sortkeys;
+   int         sortkeycnt;
+
+   if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER)
+       jointype = JOIN_INNER;
+
+   /* Look for useful mergeclauses (if any) */
+   mergeclauses = find_mergeclauses_for_pathkeys(root,
+                                                 outerpath->pathkeys,
+                                                 true,
+                                                 extra->mergeclause_list);
+
+   /*
+    * Done with this outer path if no chance for a mergejoin.
+    *
+    * Special corner case: for "x FULL JOIN y ON true", there will be no join
+    * clauses at all.  Ordinarily we'd generate a clauseless nestloop path,
+    * but since mergejoin is our only join type that supports FULL JOIN
+    * without any join clauses, it's necessary to generate a clauseless
+    * mergejoin path instead.
+    */
+   if (mergeclauses == NIL)
+   {
+       if (jointype == JOIN_FULL)
+            /* okay to try for mergejoin */ ;
+       else
+           return;
+   }
+   if (useallclauses &&
+       list_length(mergeclauses) != list_length(extra->mergeclause_list))
+       return;
+
+   /* Compute the required ordering of the inner path */
+   innersortkeys = make_inner_pathkeys_for_merge(root,
+                                                 mergeclauses,
+                                                 outerpath->pathkeys);
+
+   /*
+    * Generate a mergejoin on the basis of sorting the cheapest inner. Since
+    * a sort will be needed, only cheapest total cost matters. (But
+    * try_mergejoin_path will do the right thing if inner_cheapest_total is
+    * already correctly sorted.)
+    */
+   try_mergejoin_path(root,
+                      joinrel,
+                      outerpath,
+                      inner_cheapest_total,
+                      merge_pathkeys,
+                      mergeclauses,
+                      NIL,
+                      innersortkeys,
+                      jointype,
+                      extra);
+
+   /* Can't do anything else if inner path needs to be unique'd */
+   if (save_jointype == JOIN_UNIQUE_INNER)
+       return;
+
+   /*
+    * Look for presorted inner paths that satisfy the innersortkey list ---
+    * or any truncation thereof, if we are allowed to build a mergejoin using
+    * a subset of the merge clauses.  Here, we consider both cheap startup
+    * cost and cheap total cost.
+    *
+    * Currently we do not consider parameterized inner paths here. This
+    * interacts with decisions elsewhere that also discriminate against
+    * mergejoins with parameterized inputs; see comments in
+    * src/backend/optimizer/README.
+    *
+    * As we shorten the sortkey list, we should consider only paths that are
+    * strictly cheaper than (in particular, not the same as) any path found
+    * in an earlier iteration.  Otherwise we'd be intentionally using fewer
+    * merge keys than a given path allows (treating the rest as plain
+    * joinquals), which is unlikely to be a good idea.  Also, eliminating
+    * paths here on the basis of compare_path_costs is a lot cheaper than
+    * building the mergejoin path only to throw it away.
+    *
+    * If inner_cheapest_total is well enough sorted to have not required a
+    * sort in the path made above, we shouldn't make a duplicate path with
+    * it, either.  We handle that case with the same logic that handles the
+    * previous consideration, by initializing the variables that track
+    * cheapest-so-far properly.  Note that we do NOT reject
+    * inner_cheapest_total if we find it matches some shorter set of
+    * pathkeys.  That case corresponds to using fewer mergekeys to avoid
+    * sorting inner_cheapest_total, whereas we did sort it above, so the
+    * plans being considered are different.
+    */
+   if (pathkeys_contained_in(innersortkeys,
+                             inner_cheapest_total->pathkeys))
+   {
+       /* inner_cheapest_total didn't require a sort */
+       cheapest_startup_inner = inner_cheapest_total;
+       cheapest_total_inner = inner_cheapest_total;
+   }
+   else
+   {
+       /* it did require a sort, at least for the full set of keys */
+       cheapest_startup_inner = NULL;
+       cheapest_total_inner = NULL;
+   }
+   num_sortkeys = list_length(innersortkeys);
+   if (num_sortkeys > 1 && !useallclauses)
+       trialsortkeys = list_copy(innersortkeys);       /* need modifiable copy */
+   else
+       trialsortkeys = innersortkeys;  /* won't really truncate */
+
+   for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
+   {
+       Path       *innerpath;
+       List       *newclauses = NIL;
+
+       /*
+        * Look for an inner path ordered well enough for the first
+        * 'sortkeycnt' innersortkeys.  NB: trialsortkeys list is modified
+        * destructively, which is why we made a copy...
+        */
+       trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
+       innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
+                                                  trialsortkeys,
+                                                  NULL,
+                                                  TOTAL_COST);
+       if (innerpath != NULL &&
+           (cheapest_total_inner == NULL ||
+            compare_path_costs(innerpath, cheapest_total_inner,
+                               TOTAL_COST) < 0))
+       {
+           /* Found a cheap (or even-cheaper) sorted path */
+           /* Select the right mergeclauses, if we didn't already */
+           if (sortkeycnt < num_sortkeys)
+           {
+               newclauses =
+                   find_mergeclauses_for_pathkeys(root,
+                                                  trialsortkeys,
+                                                  false,
+                                                  mergeclauses);
+               Assert(newclauses != NIL);
+           }
+           else
+               newclauses = mergeclauses;
+           try_mergejoin_path(root,
+                              joinrel,
+                              outerpath,
+                              innerpath,
+                              merge_pathkeys,
+                              newclauses,
+                              NIL,
+                              NIL,
+                              jointype,
+                              extra);
+           cheapest_total_inner = innerpath;
+       }
+       /* Same on the basis of cheapest startup cost ... */
+       innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
+                                                  trialsortkeys,
+                                                  NULL,
+                                                  STARTUP_COST);
+       if (innerpath != NULL &&
+           (cheapest_startup_inner == NULL ||
+            compare_path_costs(innerpath, cheapest_startup_inner,
+                               STARTUP_COST) < 0))
+       {
+           /* Found a cheap (or even-cheaper) sorted path */
+           if (innerpath != cheapest_total_inner)
+           {
+               /*
+                * Avoid rebuilding clause list if we already made one; saves
+                * memory in big join trees...
+                */
+               if (newclauses == NIL)
+               {
+                   if (sortkeycnt < num_sortkeys)
+                   {
+                       newclauses =
+                           find_mergeclauses_for_pathkeys(root,
+                                                          trialsortkeys,
+                                                          false,
+                                                          mergeclauses);
+                       Assert(newclauses != NIL);
+                   }
+                   else
+                       newclauses = mergeclauses;
+               }
+               try_mergejoin_path(root,
+                                  joinrel,
+                                  outerpath,
+                                  innerpath,
+                                  merge_pathkeys,
+                                  newclauses,
+                                  NIL,
+                                  NIL,
+                                  jointype,
+                                  extra);
+           }
+           cheapest_startup_inner = innerpath;
+       }
+
+       /*
+        * Don't consider truncated sortkeys if we need all clauses.
+        */
+       if (useallclauses)
+           break;
+   }
+}
+
 /*
  * match_unsorted_outer
  *   Creates possible join paths for processing a single join relation
@@ -790,15 +1034,8 @@ sort_inner_and_outer(PlannerInfo *root,
  * cheapest-total inner-indexscan path (if any), and one on the
  * cheapest-startup inner-indexscan path (if different).
  *
- * We also consider mergejoins if mergejoin clauses are available.  We have
- * two ways to generate the inner path for a mergejoin: sort the cheapest
- * inner path, or use an inner path that is already suitably ordered for the
- * merge.  If we have several mergeclauses, it could be that there is no inner
- * path (or only a very expensive one) for the full list of mergeclauses, but
- * better paths exist if we truncate the mergeclause list (thereby discarding
- * some sort key requirements).  So, we consider truncations of the
- * mergeclause list as well as the full list.  (Ideally we'd consider all
- * subsets of the mergeclause list, but that seems way too expensive.)
+ * We also consider mergejoins if mergejoin clauses are available.  See
+ * detailed comments in generate_mergejoin_paths.
  *
  * 'joinrel' is the join relation
  * 'outerrel' is the outer join relation
@@ -894,13 +1131,6 @@ match_unsorted_outer(PlannerInfo *root,
    {
        Path       *outerpath = (Path *) lfirst(lc1);
        List       *merge_pathkeys;
-       List       *mergeclauses;
-       List       *innersortkeys;
-       List       *trialsortkeys;
-       Path       *cheapest_startup_inner;
-       Path       *cheapest_total_inner;
-       int         num_sortkeys;
-       int         sortkeycnt;
 
        /*
         * We cannot use an outer path that is parameterized by the inner rel.
@@ -986,201 +1216,10 @@ match_unsorted_outer(PlannerInfo *root,
        if (inner_cheapest_total == NULL)
            continue;
 
-       /* Look for useful mergeclauses (if any) */
-       mergeclauses = find_mergeclauses_for_pathkeys(root,
-                                                     outerpath->pathkeys,
-                                                     true,
-                                                   extra->mergeclause_list);
-
-       /*
-        * Done with this outer path if no chance for a mergejoin.
-        *
-        * Special corner case: for "x FULL JOIN y ON true", there will be no
-        * join clauses at all.  Ordinarily we'd generate a clauseless
-        * nestloop path, but since mergejoin is our only join type that
-        * supports FULL JOIN without any join clauses, it's necessary to
-        * generate a clauseless mergejoin path instead.
-        */
-       if (mergeclauses == NIL)
-       {
-           if (jointype == JOIN_FULL)
-                /* okay to try for mergejoin */ ;
-           else
-               continue;
-       }
-       if (useallclauses && list_length(mergeclauses) != list_length(extra->mergeclause_list))
-           continue;
-
-       /* Compute the required ordering of the inner path */
-       innersortkeys = make_inner_pathkeys_for_merge(root,
-                                                     mergeclauses,
-                                                     outerpath->pathkeys);
-
-       /*
-        * Generate a mergejoin on the basis of sorting the cheapest inner.
-        * Since a sort will be needed, only cheapest total cost matters. (But
-        * try_mergejoin_path will do the right thing if inner_cheapest_total
-        * is already correctly sorted.)
-        */
-       try_mergejoin_path(root,
-                          joinrel,
-                          outerpath,
-                          inner_cheapest_total,
-                          merge_pathkeys,
-                          mergeclauses,
-                          NIL,
-                          innersortkeys,
-                          jointype,
-                          extra);
-
-       /* Can't do anything else if inner path needs to be unique'd */
-       if (save_jointype == JOIN_UNIQUE_INNER)
-           continue;
-
-       /*
-        * Look for presorted inner paths that satisfy the innersortkey list
-        * --- or any truncation thereof, if we are allowed to build a
-        * mergejoin using a subset of the merge clauses.  Here, we consider
-        * both cheap startup cost and cheap total cost.
-        *
-        * Currently we do not consider parameterized inner paths here. This
-        * interacts with decisions elsewhere that also discriminate against
-        * mergejoins with parameterized inputs; see comments in
-        * src/backend/optimizer/README.
-        *
-        * As we shorten the sortkey list, we should consider only paths that
-        * are strictly cheaper than (in particular, not the same as) any path
-        * found in an earlier iteration.  Otherwise we'd be intentionally
-        * using fewer merge keys than a given path allows (treating the rest
-        * as plain joinquals), which is unlikely to be a good idea.  Also,
-        * eliminating paths here on the basis of compare_path_costs is a lot
-        * cheaper than building the mergejoin path only to throw it away.
-        *
-        * If inner_cheapest_total is well enough sorted to have not required
-        * a sort in the path made above, we shouldn't make a duplicate path
-        * with it, either.  We handle that case with the same logic that
-        * handles the previous consideration, by initializing the variables
-        * that track cheapest-so-far properly.  Note that we do NOT reject
-        * inner_cheapest_total if we find it matches some shorter set of
-        * pathkeys.  That case corresponds to using fewer mergekeys to avoid
-        * sorting inner_cheapest_total, whereas we did sort it above, so the
-        * plans being considered are different.
-        */
-       if (pathkeys_contained_in(innersortkeys,
-                                 inner_cheapest_total->pathkeys))
-       {
-           /* inner_cheapest_total didn't require a sort */
-           cheapest_startup_inner = inner_cheapest_total;
-           cheapest_total_inner = inner_cheapest_total;
-       }
-       else
-       {
-           /* it did require a sort, at least for the full set of keys */
-           cheapest_startup_inner = NULL;
-           cheapest_total_inner = NULL;
-       }
-       num_sortkeys = list_length(innersortkeys);
-       if (num_sortkeys > 1 && !useallclauses)
-           trialsortkeys = list_copy(innersortkeys);   /* need modifiable copy */
-       else
-           trialsortkeys = innersortkeys;      /* won't really truncate */
-
-       for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
-       {
-           Path       *innerpath;
-           List       *newclauses = NIL;
-
-           /*
-            * Look for an inner path ordered well enough for the first
-            * 'sortkeycnt' innersortkeys.  NB: trialsortkeys list is modified
-            * destructively, which is why we made a copy...
-            */
-           trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
-           innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
-                                                      trialsortkeys,
-                                                      NULL,
-                                                      TOTAL_COST);
-           if (innerpath != NULL &&
-               (cheapest_total_inner == NULL ||
-                compare_path_costs(innerpath, cheapest_total_inner,
-                                   TOTAL_COST) < 0))
-           {
-               /* Found a cheap (or even-cheaper) sorted path */
-               /* Select the right mergeclauses, if we didn't already */
-               if (sortkeycnt < num_sortkeys)
-               {
-                   newclauses =
-                       find_mergeclauses_for_pathkeys(root,
-                                                      trialsortkeys,
-                                                      false,
-                                                      mergeclauses);
-                   Assert(newclauses != NIL);
-               }
-               else
-                   newclauses = mergeclauses;
-               try_mergejoin_path(root,
-                                  joinrel,
-                                  outerpath,
-                                  innerpath,
-                                  merge_pathkeys,
-                                  newclauses,
-                                  NIL,
-                                  NIL,
-                                  jointype,
-                                  extra);
-               cheapest_total_inner = innerpath;
-           }
-           /* Same on the basis of cheapest startup cost ... */
-           innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
-                                                      trialsortkeys,
-                                                      NULL,
-                                                      STARTUP_COST);
-           if (innerpath != NULL &&
-               (cheapest_startup_inner == NULL ||
-                compare_path_costs(innerpath, cheapest_startup_inner,
-                                   STARTUP_COST) < 0))
-           {
-               /* Found a cheap (or even-cheaper) sorted path */
-               if (innerpath != cheapest_total_inner)
-               {
-                   /*
-                    * Avoid rebuilding clause list if we already made one;
-                    * saves memory in big join trees...
-                    */
-                   if (newclauses == NIL)
-                   {
-                       if (sortkeycnt < num_sortkeys)
-                       {
-                           newclauses =
-                               find_mergeclauses_for_pathkeys(root,
-                                                              trialsortkeys,
-                                                              false,
-                                                              mergeclauses);
-                           Assert(newclauses != NIL);
-                       }
-                       else
-                           newclauses = mergeclauses;
-                   }
-                   try_mergejoin_path(root,
-                                      joinrel,
-                                      outerpath,
-                                      innerpath,
-                                      merge_pathkeys,
-                                      newclauses,
-                                      NIL,
-                                      NIL,
-                                      jointype,
-                                      extra);
-               }
-               cheapest_startup_inner = innerpath;
-           }
-
-           /*
-            * Don't consider truncated sortkeys if we need all clauses.
-            */
-           if (useallclauses)
-               break;
-       }
+       /* Generate merge join paths */
+       generate_mergejoin_paths(root, joinrel, innerrel, outerpath,
+                                save_jointype, extra, useallclauses,
+                                inner_cheapest_total, merge_pathkeys);
    }
 
    /*