From e0ece2a981ee9068f50c4423e303836c2585eb02 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 10 Jan 2025 17:14:37 -0800 Subject: [PATCH] TupleHashTable: store additional data along with tuple. Previously, the caller needed to allocate the memory and the TupleHashTable would store a pointer to it. That wastes space for the palloc overhead as well as the size of the pointer itself. Now, the TupleHashTable relies on the caller to correctly specify the additionalsize, and allocates that amount of space. The caller can then request a pointer into that space. Discussion: https://postgr.es/m/b9cbf0219a9859dc8d240311643ff4362fd9602c.camel@j-davis.com Reviewed-by: Heikki Linnakangas --- src/backend/executor/execGrouping.c | 57 ++++++++++++++++++++++++++++- src/backend/executor/nodeAgg.c | 20 ++++------ src/backend/executor/nodeSetOp.c | 17 +++++---- src/backend/executor/nodeSubplan.c | 2 +- src/include/executor/executor.h | 3 ++ src/include/nodes/execnodes.h | 10 +---- 6 files changed, 78 insertions(+), 31 deletions(-) diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c index 33b124fbb0..40f1776a7b 100644 --- a/src/backend/executor/execGrouping.c +++ b/src/backend/executor/execGrouping.c @@ -20,6 +20,13 @@ #include "miscadmin.h" #include "utils/lsyscache.h" +typedef struct TupleHashEntryData +{ + MinimalTuple firstTuple; /* copy of first tuple in this group */ + uint32 status; /* hash status */ + uint32 hash; /* hash value (cached) */ +} TupleHashEntryData; + static int TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2); static inline uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb, const MinimalTuple tuple); @@ -196,6 +203,7 @@ BuildTupleHashTable(PlanState *parent, hashtable->tab_collations = collations; hashtable->tablecxt = tablecxt; hashtable->tempcxt = tempcxt; + hashtable->additionalsize = additionalsize; hashtable->tableslot = NULL; /* will be made on first lookup */ hashtable->inputslot = NULL; hashtable->in_hash_expr = NULL; @@ -273,6 +281,15 @@ ResetTupleHashTable(TupleHashTable hashtable) tuplehash_reset(hashtable->hashtab); } +/* + * Return size of the hash bucket. Useful for estimating memory usage. + */ +size_t +TupleHashEntrySize(void) +{ + return sizeof(TupleHashEntryData); +} + /* * Find or create a hashtable entry for the tuple group containing the * given tuple. The tuple must be the same type as the hashtable entries. @@ -339,6 +356,24 @@ TupleHashTableHash(TupleHashTable hashtable, TupleTableSlot *slot) return hash; } +MinimalTuple +TupleHashEntryGetTuple(TupleHashEntry entry) +{ + return entry->firstTuple; +} + +/* + * Get a pointer into the additional space allocated for this entry. The + * amount of space available is the additionalsize specified to + * BuildTupleHashTable(). If additionalsize was specified as zero, no + * additional space is available and this function should not be called. + */ +void * +TupleHashEntryGetAdditional(TupleHashEntry entry) +{ + return (char *) entry->firstTuple + MAXALIGN(entry->firstTuple->t_len); +} + /* * A variant of LookupTupleHashEntry for callers that have already computed * the hash value. @@ -477,13 +512,31 @@ LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot, } else { + MinimalTuple firstTuple; + size_t totalsize; /* including alignment and additionalsize */ + /* created new entry */ *isnew = true; /* zero caller data */ - entry->additional = NULL; MemoryContextSwitchTo(hashtable->tablecxt); + /* Copy the first tuple into the table context */ - entry->firstTuple = ExecCopySlotMinimalTuple(slot); + firstTuple = ExecCopySlotMinimalTuple(slot); + + /* + * Allocate additional space right after the MinimalTuple of size + * additionalsize. The caller can get a pointer to this data with + * TupleHashEntryGetAdditional(), and store arbitrary data there. + * + * This avoids the need to store an extra pointer or allocate an + * additional chunk, which would waste memory. + */ + totalsize = MAXALIGN(firstTuple->t_len) + hashtable->additionalsize; + firstTuple = repalloc(firstTuple, totalsize); + memset((char *) firstTuple + firstTuple->t_len, 0, + totalsize - firstTuple->t_len); + + entry->firstTuple = firstTuple; } } else diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index 3005b5c0e3..1964075296 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -1713,7 +1713,7 @@ hash_agg_entry_size(int numTrans, Size tupleWidth, Size transitionSpace) transitionChunkSize = 0; return - sizeof(TupleHashEntryData) + + TupleHashEntrySize() + tupleChunkSize + pergroupChunkSize + transitionChunkSize; @@ -1954,7 +1954,7 @@ hash_agg_update_metrics(AggState *aggstate, bool from_tape, int npartitions) if (aggstate->hash_ngroups_current > 0) { aggstate->hashentrysize = - sizeof(TupleHashEntryData) + + TupleHashEntrySize() + (hashkey_mem / (double) aggstate->hash_ngroups_current); } } @@ -2055,11 +2055,7 @@ initialize_hash_entry(AggState *aggstate, TupleHashTable hashtable, if (aggstate->numtrans == 0) return; - pergroup = (AggStatePerGroup) - MemoryContextAlloc(hashtable->tablecxt, - sizeof(AggStatePerGroupData) * aggstate->numtrans); - - entry->additional = pergroup; + pergroup = (AggStatePerGroup) TupleHashEntryGetAdditional(entry); /* * Initialize aggregates for new tuple group, lookup_hash_entries() @@ -2123,7 +2119,7 @@ lookup_hash_entries(AggState *aggstate) { if (isnew) initialize_hash_entry(aggstate, hashtable, entry); - pergroup[setno] = entry->additional; + pergroup[setno] = TupleHashEntryGetAdditional(entry); } else { @@ -2681,7 +2677,7 @@ agg_refill_hash_table(AggState *aggstate) { if (isnew) initialize_hash_entry(aggstate, perhash->hashtable, entry); - aggstate->hash_pergroup[batch->setno] = entry->additional; + aggstate->hash_pergroup[batch->setno] = TupleHashEntryGetAdditional(entry); advance_aggregates(aggstate); } else @@ -2773,7 +2769,7 @@ agg_retrieve_hash_table_in_memory(AggState *aggstate) ExprContext *econtext; AggStatePerAgg peragg; AggStatePerGroup pergroup; - TupleHashEntryData *entry; + TupleHashEntry entry; TupleTableSlot *firstSlot; TupleTableSlot *result; AggStatePerHash perhash; @@ -2845,7 +2841,7 @@ agg_retrieve_hash_table_in_memory(AggState *aggstate) * Transform representative tuple back into one with the right * columns. */ - ExecStoreMinimalTuple(entry->firstTuple, hashslot, false); + ExecStoreMinimalTuple(TupleHashEntryGetTuple(entry), hashslot, false); slot_getallattrs(hashslot); ExecClearTuple(firstSlot); @@ -2861,7 +2857,7 @@ agg_retrieve_hash_table_in_memory(AggState *aggstate) } ExecStoreVirtualTuple(firstSlot); - pergroup = (AggStatePerGroup) entry->additional; + pergroup = (AggStatePerGroup) TupleHashEntryGetAdditional(entry); /* * Use the representative input tuple for any references to diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c index 5b7ff9c374..2a50d56fc8 100644 --- a/src/backend/executor/nodeSetOp.c +++ b/src/backend/executor/nodeSetOp.c @@ -425,6 +425,7 @@ setop_fill_hash_table(SetOpState *setopstate) { TupleTableSlot *outerslot; TupleHashEntryData *entry; + SetOpStatePerGroup pergroup; bool isnew; outerslot = ExecProcNode(outerPlan); @@ -437,16 +438,16 @@ setop_fill_hash_table(SetOpState *setopstate) outerslot, &isnew, NULL); + pergroup = TupleHashEntryGetAdditional(entry); /* If new tuple group, initialize counts to zero */ if (isnew) { - entry->additional = (SetOpStatePerGroup) - MemoryContextAllocZero(setopstate->hashtable->tablecxt, - sizeof(SetOpStatePerGroupData)); + pergroup->numLeft = 0; + pergroup->numRight = 0; } /* Advance the counts */ - ((SetOpStatePerGroup) entry->additional)->numLeft++; + pergroup->numLeft++; /* Must reset expression context after each hashtable lookup */ ResetExprContext(econtext); @@ -478,7 +479,7 @@ setop_fill_hash_table(SetOpState *setopstate) /* Advance the counts if entry is already present */ if (entry) - ((SetOpStatePerGroup) entry->additional)->numRight++; + ((SetOpStatePerGroup) TupleHashEntryGetAdditional(entry))->numRight++; /* Must reset expression context after each hashtable lookup */ ResetExprContext(econtext); @@ -496,7 +497,7 @@ setop_fill_hash_table(SetOpState *setopstate) static TupleTableSlot * setop_retrieve_hash_table(SetOpState *setopstate) { - TupleHashEntryData *entry; + TupleHashEntry entry; TupleTableSlot *resultTupleSlot; /* @@ -526,12 +527,12 @@ setop_retrieve_hash_table(SetOpState *setopstate) * See if we should emit any copies of this tuple, and if so return * the first copy. */ - set_output_count(setopstate, (SetOpStatePerGroup) entry->additional); + set_output_count(setopstate, (SetOpStatePerGroup) TupleHashEntryGetAdditional(entry)); if (setopstate->numOutput > 0) { setopstate->numOutput--; - return ExecStoreMinimalTuple(entry->firstTuple, + return ExecStoreMinimalTuple(TupleHashEntryGetTuple(entry), resultTupleSlot, false); } diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c index 49767ed6a5..f7f6fc2da0 100644 --- a/src/backend/executor/nodeSubplan.c +++ b/src/backend/executor/nodeSubplan.c @@ -753,7 +753,7 @@ findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot, { CHECK_FOR_INTERRUPTS(); - ExecStoreMinimalTuple(entry->firstTuple, hashtable->tableslot, false); + ExecStoreMinimalTuple(TupleHashEntryGetTuple(entry), hashtable->tableslot, false); if (!execTuplesUnequal(slot, hashtable->tableslot, numCols, keyColIdx, eqfunctions, diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index f8a8d03e53..a49c32072e 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -148,9 +148,12 @@ extern TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable, bool *isnew, uint32 *hash); extern uint32 TupleHashTableHash(TupleHashTable hashtable, TupleTableSlot *slot); +extern size_t TupleHashEntrySize(void); extern TupleHashEntry LookupTupleHashEntryHash(TupleHashTable hashtable, TupleTableSlot *slot, bool *isnew, uint32 hash); +extern MinimalTuple TupleHashEntryGetTuple(TupleHashEntry entry); +extern void *TupleHashEntryGetAdditional(TupleHashEntry entry); extern TupleHashEntry FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, ExprState *eqcomp, diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index b3f7aa299f..7d5871d6fa 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -806,17 +806,10 @@ typedef struct ExecAuxRowMark * point to tab_hash_expr and tab_eq_func respectively. * ---------------------------------------------------------------- */ +typedef struct TupleHashEntryData TupleHashEntryData; typedef struct TupleHashEntryData *TupleHashEntry; typedef struct TupleHashTableData *TupleHashTable; -typedef struct TupleHashEntryData -{ - MinimalTuple firstTuple; /* copy of first tuple in this group */ - void *additional; /* user data */ - uint32 status; /* hash status */ - uint32 hash; /* hash value (cached) */ -} TupleHashEntryData; - /* define parameters necessary to generate the tuple hash table interface */ #define SH_PREFIX tuplehash #define SH_ELEMENT_TYPE TupleHashEntryData @@ -835,6 +828,7 @@ typedef struct TupleHashTableData Oid *tab_collations; /* collations for hash and comparison */ MemoryContext tablecxt; /* memory context containing table */ MemoryContext tempcxt; /* context for function evaluations */ + Size additionalsize; /* size of additional data */ TupleTableSlot *tableslot; /* slot for referencing table entries */ /* The following fields are set transiently for each table search: */ TupleTableSlot *inputslot; /* current input tuple's slot */ -- 2.39.5