Fix LookupTupleHashEntryHash() pipeline-stall issue.
authorJeff Davis <jdavis@postgresql.org>
Sun, 26 Jul 2020 21:55:52 +0000 (14:55 -0700)
committerJeff Davis <jdavis@postgresql.org>
Sun, 26 Jul 2020 22:09:46 +0000 (15:09 -0700)
Refactor hash lookups in nodeAgg.c to improve performance.

Author: Andres Freund and Jeff Davis
Discussion: https://postgr.es/m/20200612213715.op4ye4q7gktqvpuo%40alap3.anarazel.de
Backpatch-through: 13

src/backend/executor/execGrouping.c
src/backend/executor/nodeAgg.c
src/backend/executor/nodeRecursiveunion.c
src/backend/executor/nodeSetOp.c
src/backend/executor/nodeSubplan.c
src/include/executor/executor.h

index 019b87df21ecb67130cfac76ada78ad738550e1f..321f427e478fcf3748f910b4655882c8302080c9 100644 (file)
 #include "utils/memutils.h"
 
 static int     TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2);
-static uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb,
-                                                                                 const MinimalTuple tuple);
-static TupleHashEntry LookupTupleHashEntry_internal(TupleHashTable hashtable,
-                                                                                                       TupleTableSlot *slot,
-                                                                                                       bool *isnew, uint32 hash);
+static inline uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb,
+                                                                                                const MinimalTuple tuple);
+static inline TupleHashEntry LookupTupleHashEntry_internal(TupleHashTable hashtable,
+                                                                                                                  TupleTableSlot *slot,
+                                                                                                                  bool *isnew, uint32 hash);
 
 /*
  * Define parameters for tuple hash table code generation. The interface is
@@ -291,6 +291,9 @@ ResetTupleHashTable(TupleHashTable hashtable)
  * If isnew is NULL, we do not create new entries; we return NULL if no
  * match is found.
  *
+ * If hash is not NULL, we set it to the calculated hash value. This allows
+ * callers access to the hash value even if no entry is returned.
+ *
  * If isnew isn't NULL, then a new entry is created if no existing entry
  * matches.  On return, *isnew is true if the entry is newly created,
  * false if it existed already.  ->additional_data in the new entry has
@@ -298,11 +301,11 @@ ResetTupleHashTable(TupleHashTable hashtable)
  */
 TupleHashEntry
 LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
-                                        bool *isnew)
+                                        bool *isnew, uint32 *hash)
 {
        TupleHashEntry entry;
        MemoryContext oldContext;
-       uint32          hash;
+       uint32          local_hash;
 
        /* Need to run the hash functions in short-lived context */
        oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
@@ -312,8 +315,13 @@ LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
        hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
        hashtable->cur_eq_func = hashtable->tab_eq_func;
 
-       hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
-       entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, hash);
+       local_hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
+       entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, local_hash);
+
+       if (hash != NULL)
+               *hash = local_hash;
+
+       Assert(entry == NULL || entry->hash == local_hash);
 
        MemoryContextSwitchTo(oldContext);
 
@@ -362,6 +370,7 @@ LookupTupleHashEntryHash(TupleHashTable hashtable, TupleTableSlot *slot,
        hashtable->cur_eq_func = hashtable->tab_eq_func;
 
        entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, hash);
+       Assert(entry == NULL || entry->hash == hash);
 
        MemoryContextSwitchTo(oldContext);
 
@@ -480,7 +489,7 @@ TupleHashTableHash_internal(struct tuplehash_hash *tb,
  * NB: This function may or may not change the memory context. Caller is
  * expected to change it back.
  */
-static TupleHashEntry
+static inline TupleHashEntry
 LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot,
                                                          bool *isnew, uint32 hash)
 {
index b79c845a6b739d606c63d1bc9b24a39c8155949f..bbfc4af1ec9cb1c581a1a49da4eeee6b24d662f7 100644 (file)
@@ -391,7 +391,9 @@ static void finalize_partialaggregate(AggState *aggstate,
                                                                          AggStatePerAgg peragg,
                                                                          AggStatePerGroup pergroupstate,
                                                                          Datum *resultVal, bool *resultIsNull);
-static void prepare_hash_slot(AggState *aggstate);
+static inline void prepare_hash_slot(AggStatePerHash perhash,
+                                                                        TupleTableSlot *inputslot,
+                                                                        TupleTableSlot *hashslot);
 static void prepare_projection_slot(AggState *aggstate,
                                                                        TupleTableSlot *slot,
                                                                        int currentSet);
@@ -413,8 +415,9 @@ static int  hash_choose_num_partitions(uint64 input_groups,
                                                                           double hashentrysize,
                                                                           int used_bits,
                                                                           int *log2_npartittions);
-static AggStatePerGroup lookup_hash_entry(AggState *aggstate, uint32 hash,
-                                                                                 bool *in_hash_table);
+static void initialize_hash_entry(AggState *aggstate,
+                                                                 TupleHashTable hashtable,
+                                                                 TupleHashEntry entry);
 static void lookup_hash_entries(AggState *aggstate);
 static TupleTableSlot *agg_retrieve_direct(AggState *aggstate);
 static void agg_fill_hash_table(AggState *aggstate);
@@ -1207,12 +1210,11 @@ finalize_partialaggregate(AggState *aggstate,
  * Extract the attributes that make up the grouping key into the
  * hashslot. This is necessary to compute the hash or perform a lookup.
  */
-static void
-prepare_hash_slot(AggState *aggstate)
+static inline void
+prepare_hash_slot(AggStatePerHash perhash,
+                                 TupleTableSlot *inputslot,
+                                 TupleTableSlot *hashslot)
 {
-       TupleTableSlot *inputslot = aggstate->tmpcontext->ecxt_outertuple;
-       AggStatePerHash perhash = &aggstate->perhash[aggstate->current_set];
-       TupleTableSlot *hashslot = perhash->hashslot;
        int                     i;
 
        /* transfer just the needed columns into hashslot */
@@ -2013,75 +2015,39 @@ hash_choose_num_partitions(uint64 input_groups, double hashentrysize,
 }
 
 /*
- * Find or create a hashtable entry for the tuple group containing the current
- * tuple (already set in tmpcontext's outertuple slot), in the current grouping
- * set (which the caller must have selected - note that initialize_aggregate
- * depends on this).
- *
- * When called, CurrentMemoryContext should be the per-query context. The
- * already-calculated hash value for the tuple must be specified.
- *
- * If in "spill mode", then only find existing hashtable entries; don't create
- * new ones. If a tuple's group is not already present in the hash table for
- * the current grouping set, assign *in_hash_table=false and the caller will
- * spill it to disk.
+ * Initialize a freshly-created TupleHashEntry.
  */
-static AggStatePerGroup
-lookup_hash_entry(AggState *aggstate, uint32 hash, bool *in_hash_table)
+static void
+initialize_hash_entry(AggState *aggstate, TupleHashTable hashtable,
+                                         TupleHashEntry entry)
 {
-       AggStatePerHash perhash = &aggstate->perhash[aggstate->current_set];
-       TupleTableSlot *hashslot = perhash->hashslot;
-       TupleHashEntryData *entry;
-       bool            isnew = false;
-       bool       *p_isnew;
-
-       /* if hash table already spilled, don't create new entries */
-       p_isnew = aggstate->hash_spill_mode ? NULL : &isnew;
-
-       /* find or create the hashtable entry using the filtered tuple */
-       entry = LookupTupleHashEntryHash(perhash->hashtable, hashslot, p_isnew,
-                                                                        hash);
-
-       if (entry == NULL)
-       {
-               *in_hash_table = false;
-               return NULL;
-       }
-       else
-               *in_hash_table = true;
-
-       if (isnew)
-       {
-               AggStatePerGroup pergroup;
-               int                     transno;
+       AggStatePerGroup pergroup;
+       int                     transno;
 
-               aggstate->hash_ngroups_current++;
-               hash_agg_check_limits(aggstate);
+       aggstate->hash_ngroups_current++;
+       hash_agg_check_limits(aggstate);
 
-               /* no need to allocate or initialize per-group state */
-               if (aggstate->numtrans == 0)
-                       return NULL;
+       /* no need to allocate or initialize per-group state */
+       if (aggstate->numtrans == 0)
+               return;
 
-               pergroup = (AggStatePerGroup)
-                       MemoryContextAlloc(perhash->hashtable->tablecxt,
-                                                          sizeof(AggStatePerGroupData) * aggstate->numtrans);
+       pergroup = (AggStatePerGroup)
+               MemoryContextAlloc(hashtable->tablecxt,
+                                                  sizeof(AggStatePerGroupData) * aggstate->numtrans);
 
-               entry->additional = pergroup;
+       entry->additional = pergroup;
 
-               /*
-                * Initialize aggregates for new tuple group, lookup_hash_entries()
-                * already has selected the relevant grouping set.
-                */
-               for (transno = 0; transno < aggstate->numtrans; transno++)
-               {
-                       AggStatePerTrans pertrans = &aggstate->pertrans[transno];
-                       AggStatePerGroup pergroupstate = &pergroup[transno];
+       /*
+        * Initialize aggregates for new tuple group, lookup_hash_entries()
+        * already has selected the relevant grouping set.
+        */
+       for (transno = 0; transno < aggstate->numtrans; transno++)
+       {
+               AggStatePerTrans pertrans = &aggstate->pertrans[transno];
+               AggStatePerGroup pergroupstate = &pergroup[transno];
 
-                       initialize_aggregate(aggstate, pertrans, pergroupstate);
-               }
+               initialize_aggregate(aggstate, pertrans, pergroupstate);
        }
-
-       return entry->additional;
 }
 
 /*
@@ -2106,21 +2072,37 @@ static void
 lookup_hash_entries(AggState *aggstate)
 {
        AggStatePerGroup *pergroup = aggstate->hash_pergroup;
+       TupleTableSlot *outerslot = aggstate->tmpcontext->ecxt_outertuple;
        int                     setno;
 
        for (setno = 0; setno < aggstate->num_hashes; setno++)
        {
                AggStatePerHash perhash = &aggstate->perhash[setno];
+               TupleHashTable hashtable = perhash->hashtable;
+               TupleTableSlot *hashslot = perhash->hashslot;
+               TupleHashEntry entry;
                uint32          hash;
-               bool            in_hash_table;
+               bool            isnew = false;
+               bool       *p_isnew;
+
+               /* if hash table already spilled, don't create new entries */
+               p_isnew = aggstate->hash_spill_mode ? NULL : &isnew;
 
                select_current_set(aggstate, setno, true);
-               prepare_hash_slot(aggstate);
-               hash = TupleHashTableHash(perhash->hashtable, perhash->hashslot);
-               pergroup[setno] = lookup_hash_entry(aggstate, hash, &in_hash_table);
+               prepare_hash_slot(perhash,
+                                                 outerslot,
+                                                 hashslot);
+
+               entry = LookupTupleHashEntry(hashtable, hashslot,
+                                                                        p_isnew, &hash);
 
-               /* check to see if we need to spill the tuple for this grouping set */
-               if (!in_hash_table)
+               if (entry != NULL)
+               {
+                       if (isnew)
+                               initialize_hash_entry(aggstate, hashtable, entry);
+                       pergroup[setno] = entry->additional;
+               }
+               else
                {
                        HashAggSpill *spill = &aggstate->hash_spills[setno];
                        TupleTableSlot *slot = aggstate->tmpcontext->ecxt_outertuple;
@@ -2131,6 +2113,7 @@ lookup_hash_entries(AggState *aggstate)
                                                                   aggstate->hashentrysize);
 
                        hashagg_spill_tuple(aggstate, spill, slot, hash);
+                       pergroup[setno] = NULL;
                }
        }
 }
@@ -2588,6 +2571,7 @@ static bool
 agg_refill_hash_table(AggState *aggstate)
 {
        HashAggBatch *batch;
+       AggStatePerHash perhash;
        HashAggSpill spill;
        HashTapeInfo *tapeinfo = aggstate->hash_tapeinfo;
        uint64          ngroups_estimate;
@@ -2639,6 +2623,8 @@ agg_refill_hash_table(AggState *aggstate)
 
        select_current_set(aggstate, batch->setno, true);
 
+       perhash = &aggstate->perhash[aggstate->current_set];
+
        /*
         * Spilled tuples are always read back as MinimalTuples, which may be
         * different from the outer plan, so recompile the aggregate expressions.
@@ -2652,10 +2638,13 @@ agg_refill_hash_table(AggState *aggstate)
                                                         HASHAGG_READ_BUFFER_SIZE);
        for (;;)
        {
-               TupleTableSlot *slot = aggstate->hash_spill_rslot;
+               TupleTableSlot *spillslot = aggstate->hash_spill_rslot;
+               TupleTableSlot *hashslot = perhash->hashslot;
+               TupleHashEntry entry;
                MinimalTuple tuple;
                uint32          hash;
-               bool            in_hash_table;
+               bool            isnew = false;
+               bool       *p_isnew = aggstate->hash_spill_mode ? NULL : &isnew;
 
                CHECK_FOR_INTERRUPTS();
 
@@ -2663,16 +2652,20 @@ agg_refill_hash_table(AggState *aggstate)
                if (tuple == NULL)
                        break;
 
-               ExecStoreMinimalTuple(tuple, slot, true);
-               aggstate->tmpcontext->ecxt_outertuple = slot;
+               ExecStoreMinimalTuple(tuple, spillslot, true);
+               aggstate->tmpcontext->ecxt_outertuple = spillslot;
 
-               prepare_hash_slot(aggstate);
-               aggstate->hash_pergroup[batch->setno] =
-                       lookup_hash_entry(aggstate, hash, &in_hash_table);
+               prepare_hash_slot(perhash,
+                                                 aggstate->tmpcontext->ecxt_outertuple,
+                                                 hashslot);
+               entry = LookupTupleHashEntryHash(
+                                                                                perhash->hashtable, hashslot, p_isnew, hash);
 
-               if (in_hash_table)
+               if (entry != NULL)
                {
-                       /* Advance the aggregates (or combine functions) */
+                       if (isnew)
+                               initialize_hash_entry(aggstate, perhash->hashtable, entry);
+                       aggstate->hash_pergroup[batch->setno] = entry->additional;
                        advance_aggregates(aggstate);
                }
                else
@@ -2688,7 +2681,9 @@ agg_refill_hash_table(AggState *aggstate)
                                                                   ngroups_estimate, aggstate->hashentrysize);
                        }
                        /* no memory for a new group, spill */
-                       hashagg_spill_tuple(aggstate, &spill, slot, hash);
+                       hashagg_spill_tuple(aggstate, &spill, spillslot, hash);
+
+                       aggstate->hash_pergroup[batch->setno] = NULL;
                }
 
                /*
index 620414a1edcf9afb5a6edbc44df0dbea4312188a..046242682f017f48a4571a78e74205e86d6a9ecd 100644 (file)
@@ -94,7 +94,7 @@ ExecRecursiveUnion(PlanState *pstate)
                        if (plan->numCols > 0)
                        {
                                /* Find or build hashtable entry for this tuple's group */
-                               LookupTupleHashEntry(node->hashtable, slot, &isnew);
+                               LookupTupleHashEntry(node->hashtable, slot, &isnew, NULL);
                                /* Must reset temp context after each hashtable lookup */
                                MemoryContextReset(node->tempContext);
                                /* Ignore tuple if already seen */
@@ -141,7 +141,7 @@ ExecRecursiveUnion(PlanState *pstate)
                if (plan->numCols > 0)
                {
                        /* Find or build hashtable entry for this tuple's group */
-                       LookupTupleHashEntry(node->hashtable, slot, &isnew);
+                       LookupTupleHashEntry(node->hashtable, slot, &isnew, NULL);
                        /* Must reset temp context after each hashtable lookup */
                        MemoryContextReset(node->tempContext);
                        /* Ignore tuple if already seen */
index bfd148a41a2416cea719da8157eed0e6b98b4ed9..8d4ccff19cc6222403bd3699f238c193af8d13d8 100644 (file)
@@ -381,7 +381,7 @@ setop_fill_hash_table(SetOpState *setopstate)
 
                        /* Find or build hashtable entry for this tuple's group */
                        entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
-                                                                                &isnew);
+                                                                                &isnew, NULL);
 
                        /* If new tuple group, initialize counts */
                        if (isnew)
@@ -402,7 +402,7 @@ setop_fill_hash_table(SetOpState *setopstate)
 
                        /* For tuples not seen previously, do not make hashtable entry */
                        entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
-                                                                                NULL);
+                                                                                NULL, NULL);
 
                        /* Advance the counts if entry is already present */
                        if (entry)
index 298b7757f57ccf1c4ec3645223e9dab28e0b87ee..38c2fc0b50b66e31619aafa37d05c44caa4d3b0a 100644 (file)
@@ -595,12 +595,12 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
                 */
                if (slotNoNulls(slot))
                {
-                       (void) LookupTupleHashEntry(node->hashtable, slot, &isnew);
+                       (void) LookupTupleHashEntry(node->hashtable, slot, &isnew, NULL);
                        node->havehashrows = true;
                }
                else if (node->hashnulls)
                {
-                       (void) LookupTupleHashEntry(node->hashnulls, slot, &isnew);
+                       (void) LookupTupleHashEntry(node->hashnulls, slot, &isnew, NULL);
                        node->havenullrows = true;
                }
 
index c7deeac662f6aa17c7cf42bccb605e01d11df412..415e117407c9688e9ca9422efff9aaae221255ca 100644 (file)
@@ -139,7 +139,7 @@ extern TupleHashTable BuildTupleHashTableExt(PlanState *parent,
                                                                                         MemoryContext tempcxt, bool use_variable_hash_iv);
 extern TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable,
                                                                                   TupleTableSlot *slot,
-                                                                                  bool *isnew);
+                                                                                  bool *isnew, uint32 *hash);
 extern uint32 TupleHashTableHash(TupleHashTable hashtable,
                                                                 TupleTableSlot *slot);
 extern TupleHashEntry LookupTupleHashEntryHash(TupleHashTable hashtable,