Improve dynahash.c's API so that caller can specify the comparison function
authorTom Lane <tgl@sss.pgh.pa.us>
Tue, 19 Aug 2003 01:13:41 +0000 (01:13 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Tue, 19 Aug 2003 01:13:41 +0000 (01:13 +0000)
as well as the hash function (formerly the comparison function was hardwired
as memcmp()).  This makes it possible to eliminate the special-purpose
hashtable management code in execGrouping.c in favor of using dynahash to
manage tuple hashtables; which is a win because dynahash knows how to expand
a hashtable when the original size estimate was too small, whereas the
special-purpose code was too stupid to do that.  (See recent gripe from
Stephan Szabo about poor performance when hash table size estimate is way
off.)  Free side benefit: when using string_hash, the default comparison
function is now strncmp() instead of memcmp().  This should eliminate some
part of the overhead associated with larger NAMEDATALEN values.

src/backend/executor/execGrouping.c
src/backend/executor/nodeAgg.c
src/backend/executor/nodeSubplan.c
src/backend/utils/hash/dynahash.c
src/backend/utils/hash/hashfn.c
src/include/executor/executor.h
src/include/nodes/execnodes.h
src/include/utils/hsearch.h
src/test/regress/expected/polymorphism.out

index 100e7a1c375caa47a6b724a518bc872c3b5e83fc..d293bb7ff29a2e107f68f8ba98b6ec5c04710463 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/executor/execGrouping.c,v 1.7 2003/08/08 21:41:34 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/executor/execGrouping.c,v 1.8 2003/08/19 01:13:40 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "utils/syscache.h"
 
 
+static TupleHashTable CurTupleHashTable = NULL;
+
+static uint32 TupleHashTableHash(const void *key, Size keysize);
+static int     TupleHashTableMatch(const void *key1, const void *key2,
+                                                               Size keysize);
+
+
 /*****************************************************************************
  *             Utility routines for grouping tuples together
  *****************************************************************************/
@@ -272,7 +279,7 @@ execTuplesHashPrepare(TupleDesc tupdesc,
  *     numCols, keyColIdx: identify the tuple fields to use as lookup key
  *     eqfunctions: equality comparison functions to use
  *     hashfunctions: datatype-specific hashing functions to use
- *     nbuckets: number of buckets to make
+ *     nbuckets: initial estimate of hashtable size
  *     entrysize: size of each entry (at least sizeof(TupleHashEntryData))
  *     tablecxt: memory context in which to store table and table entries
  *     tempcxt: short-lived context for evaluation hash and comparison functions
@@ -290,14 +297,13 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
                                        MemoryContext tablecxt, MemoryContext tempcxt)
 {
        TupleHashTable hashtable;
-       Size            tabsize;
+       HASHCTL         hash_ctl;
 
        Assert(nbuckets > 0);
        Assert(entrysize >= sizeof(TupleHashEntryData));
 
-       tabsize = sizeof(TupleHashTableData) +
-               (nbuckets - 1) *sizeof(TupleHashEntry);
-       hashtable = (TupleHashTable) MemoryContextAllocZero(tablecxt, tabsize);
+       hashtable = (TupleHashTable) MemoryContextAlloc(tablecxt,
+                                                                                               sizeof(TupleHashTableData));
 
        hashtable->numCols = numCols;
        hashtable->keyColIdx = keyColIdx;
@@ -306,7 +312,20 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
        hashtable->tablecxt = tablecxt;
        hashtable->tempcxt = tempcxt;
        hashtable->entrysize = entrysize;
-       hashtable->nbuckets = nbuckets;
+
+       MemSet(&hash_ctl, 0, sizeof(hash_ctl));
+       hash_ctl.keysize = sizeof(TupleHashEntryData);
+       hash_ctl.entrysize = entrysize;
+       hash_ctl.hash = TupleHashTableHash;
+       hash_ctl.match = TupleHashTableMatch;
+       hash_ctl.hcxt = tablecxt;
+       hashtable->hashtab = hash_create("TupleHashTable", (long) nbuckets,
+                                                                        &hash_ctl,
+                                       HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT);
+       if (hashtable->hashtab == NULL)
+               ereport(ERROR,
+                               (errcode(ERRCODE_OUT_OF_MEMORY),
+                                errmsg("out of memory")));
 
        return hashtable;
 }
@@ -327,19 +346,93 @@ TupleHashEntry
 LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
                                         bool *isnew)
 {
-       int                     numCols = hashtable->numCols;
-       AttrNumber *keyColIdx = hashtable->keyColIdx;
        HeapTuple       tuple = slot->val;
        TupleDesc       tupdesc = slot->ttc_tupleDescriptor;
-       uint32          hashkey = 0;
-       int                     i;
-       int                     bucketno;
        TupleHashEntry entry;
        MemoryContext oldContext;
+       TupleHashTable saveCurHT;
+       bool            found;
 
-       /* Need to run the hash function in short-lived context */
+       /* Need to run the hash functions in short-lived context */
        oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
 
+       /*
+        * Set up data needed by hash and match functions
+        *
+        * We save and restore CurTupleHashTable just in case someone manages
+        * to invoke this code re-entrantly.
+        */
+       hashtable->tupdesc = tupdesc;
+       saveCurHT = CurTupleHashTable;
+       CurTupleHashTable = hashtable;
+
+       /* Search the hash table */
+       entry = (TupleHashEntry) hash_search(hashtable->hashtab,
+                                                                                &tuple,
+                                                                                isnew ? HASH_ENTER : HASH_FIND,
+                                                                                &found);
+
+       if (isnew)
+       {
+               if (found)
+               {
+                       /* found pre-existing entry */
+                       *isnew = false;
+               }
+               else
+               {
+                       /* created new entry ... we hope */
+                       if (entry == NULL)
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_OUT_OF_MEMORY),
+                                                errmsg("out of memory")));
+
+                       /*
+                        * Zero any caller-requested space in the entry.  (This zaps
+                        * the "key data" dynahash.c copied into the new entry, but
+                        * we don't care since we're about to overwrite it anyway.)
+                        */
+                       MemSet(entry, 0, hashtable->entrysize);
+
+                       /* Copy the first tuple into the table context */
+                       MemoryContextSwitchTo(hashtable->tablecxt);
+                       entry->firstTuple = heap_copytuple(tuple);
+
+                       *isnew = true;
+               }
+       }
+
+       CurTupleHashTable = saveCurHT;
+
+       MemoryContextSwitchTo(oldContext);
+
+       return entry;
+}
+
+/*
+ * Compute the hash value for a tuple
+ *
+ * The passed-in key is a pointer to a HeapTuple pointer -- this is either
+ * the firstTuple field of a TupleHashEntry struct, or the key value passed
+ * to hash_search.  We ignore the keysize.
+ *
+ * CurTupleHashTable must be set before calling this, since dynahash.c
+ * doesn't provide any API that would let us get at the hashtable otherwise.
+ *
+ * Also, the caller must select an appropriate memory context for running
+ * the hash functions.  (dynahash.c doesn't change CurrentMemoryContext.)
+ */
+static uint32
+TupleHashTableHash(const void *key, Size keysize)
+{
+       HeapTuple       tuple = *(const HeapTuple *) key;
+       TupleHashTable hashtable = CurTupleHashTable;
+       int                     numCols = hashtable->numCols;
+       AttrNumber *keyColIdx = hashtable->keyColIdx;
+       TupleDesc       tupdesc = hashtable->tupdesc;
+       uint32          hashkey = 0;
+       int                     i;
+
        for (i = 0; i < numCols; i++)
        {
                AttrNumber      att = keyColIdx[i];
@@ -360,72 +453,36 @@ LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
                        hashkey ^= hkey;
                }
        }
-       bucketno = hashkey % (uint32) hashtable->nbuckets;
-
-       for (entry = hashtable->buckets[bucketno];
-                entry != NULL;
-                entry = entry->next)
-       {
-               /* Quick check using hashkey */
-               if (entry->hashkey != hashkey)
-                       continue;
-               if (execTuplesMatch(entry->firstTuple,
-                                                       tuple,
-                                                       tupdesc,
-                                                       numCols, keyColIdx,
-                                                       hashtable->eqfunctions,
-                                                       hashtable->tempcxt))
-               {
-                       if (isnew)
-                               *isnew = false;
-                       MemoryContextSwitchTo(oldContext);
-                       return entry;
-               }
-       }
-
-       /* Not there, so build a new one if requested */
-       if (isnew)
-       {
-               MemoryContextSwitchTo(hashtable->tablecxt);
-
-               entry = (TupleHashEntry) palloc0(hashtable->entrysize);
-
-               entry->hashkey = hashkey;
-               entry->firstTuple = heap_copytuple(tuple);
-
-               entry->next = hashtable->buckets[bucketno];
-               hashtable->buckets[bucketno] = entry;
-
-               *isnew = true;
-       }
-
-       MemoryContextSwitchTo(oldContext);
 
-       return entry;
+       return hashkey;
 }
 
 /*
- * Walk through all the entries of a hash table, in no special order.
- * Returns NULL when no more entries remain.
+ * See whether two tuples (presumably of the same hash value) match
+ *
+ * As above, the passed pointers are pointers to HeapTuple pointers.
  *
- * Iterator state must be initialized with ResetTupleHashIterator() macro.
+ * CurTupleHashTable must be set before calling this, since dynahash.c
+ * doesn't provide any API that would let us get at the hashtable otherwise.
+ *
+ * Also, the caller must select an appropriate memory context for running
+ * the compare functions.  (dynahash.c doesn't change CurrentMemoryContext.)
  */
-TupleHashEntry
-ScanTupleHashTable(TupleHashTable hashtable, TupleHashIterator *state)
+static int
+TupleHashTableMatch(const void *key1, const void *key2, Size keysize)
 {
-       TupleHashEntry entry;
-
-       entry = state->next_entry;
-       while (entry == NULL)
-       {
-               if (state->next_bucket >= hashtable->nbuckets)
-               {
-                       /* No more entries in hashtable, so done */
-                       return NULL;
-               }
-               entry = hashtable->buckets[state->next_bucket++];
-       }
-       state->next_entry = entry->next;
-
-       return entry;
+       HeapTuple       tuple1 = *(const HeapTuple *) key1;
+       HeapTuple       tuple2 = *(const HeapTuple *) key2;
+       TupleHashTable hashtable = CurTupleHashTable;
+
+       if (execTuplesMatch(tuple1,
+                                               tuple2,
+                                               hashtable->tupdesc,
+                                               hashtable->numCols,
+                                               hashtable->keyColIdx,
+                                               hashtable->eqfunctions,
+                                               hashtable->tempcxt))
+               return 0;
+       else
+               return 1;
 }
index d8fb9a9565da6919d8a9adf07ebbc3d5fe564da9..d9adb09dafbe05ae10adf618cb63a969c478c667 100644 (file)
@@ -45,7 +45,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.115 2003/08/08 21:41:41 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.116 2003/08/19 01:13:40 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -905,7 +905,7 @@ agg_fill_hash_table(AggState *aggstate)
 
        aggstate->table_filled = true;
        /* Initialize to walk the hash table */
-       ResetTupleHashIterator(&aggstate->hashiter);
+       ResetTupleHashIterator(aggstate->hashtable, &aggstate->hashiter);
 }
 
 /*
@@ -920,7 +920,6 @@ agg_retrieve_hash_table(AggState *aggstate)
        bool       *aggnulls;
        AggStatePerAgg peragg;
        AggStatePerGroup pergroup;
-       TupleHashTable hashtable;
        AggHashEntry entry;
        TupleTableSlot *firstSlot;
        TupleTableSlot *resultSlot;
@@ -935,7 +934,6 @@ agg_retrieve_hash_table(AggState *aggstate)
        aggnulls = econtext->ecxt_aggnulls;
        projInfo = aggstate->ss.ps.ps_ProjInfo;
        peragg = aggstate->peragg;
-       hashtable = aggstate->hashtable;
        firstSlot = aggstate->ss.ss_ScanTupleSlot;
 
        /*
@@ -950,8 +948,7 @@ agg_retrieve_hash_table(AggState *aggstate)
                /*
                 * Find the next entry in the hash table
                 */
-               entry = (AggHashEntry) ScanTupleHashTable(hashtable,
-                                                                                                 &aggstate->hashiter);
+               entry = (AggHashEntry) ScanTupleHashTable(&aggstate->hashiter);
                if (entry == NULL)
                {
                        /* No more entries in hashtable, so done */
@@ -1440,7 +1437,7 @@ ExecReScanAgg(AggState *node, ExprContext *exprCtxt)
                 */
                if (((PlanState *) node)->lefttree->chgParam == NULL)
                {
-                       ResetTupleHashIterator(&node->hashiter);
+                       ResetTupleHashIterator(node->hashtable, &node->hashiter);
                        return;
                }
        }
index 7530be263f32b0d286e8bfb5f9a43d368ae6550c..23b0cd3bf3d6c9499698cadfeea1b78dc33ab2c3 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/executor/nodeSubplan.c,v 1.54 2003/08/08 21:41:42 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/executor/nodeSubplan.c,v 1.55 2003/08/19 01:13:40 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -627,8 +627,8 @@ findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot)
        TupleHashIterator hashiter;
        TupleHashEntry entry;
 
-       ResetTupleHashIterator(&hashiter);
-       while ((entry = ScanTupleHashTable(hashtable, &hashiter)) != NULL)
+       ResetTupleHashIterator(hashtable, &hashiter);
+       while ((entry = ScanTupleHashTable(&hashiter)) != NULL)
        {
                if (!execTuplesUnequal(entry->firstTuple,
                                                           tuple,
index 7090d000587c7fb704106f023bddedc4b405db51..c6f9b0236975f153b3cdea846313e73161d6a1e2 100644 (file)
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/utils/hash/dynahash.c,v 1.47 2003/08/04 02:40:06 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/utils/hash/dynahash.c,v 1.48 2003/08/19 01:13:41 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -44,7 +44,6 @@
 
 #include "postgres.h"
 
-
 #include "utils/dynahash.h"
 #include "utils/hsearch.h"
 #include "utils/memutils.h"
@@ -63,7 +62,6 @@
  * Private function prototypes
  */
 static void *DynaHashAlloc(Size size);
-static uint32 call_hash(HTAB *hashp, void *k);
 static HASHSEGMENT seg_alloc(HTAB *hashp);
 static bool element_alloc(HTAB *hashp);
 static bool dir_realloc(HTAB *hashp);
@@ -133,6 +131,19 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
        else
                hashp->hash = string_hash;              /* default hash function */
 
+       /*
+        * If you don't specify a match function, it defaults to strncmp() if
+        * you used string_hash (either explicitly or by default) and to
+        * memcmp() otherwise.  (Prior to PostgreSQL 7.4, memcmp() was always
+        * used.)
+        */
+       if (flags & HASH_COMPARE)
+               hashp->match = info->match;
+       else if (hashp->hash == string_hash)
+               hashp->match = (HashCompareFunc) strncmp;
+       else
+               hashp->match = memcmp;
+
        if (flags & HASH_SHARED_MEM)
        {
                /*
@@ -155,7 +166,7 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
                hashp->hctl = NULL;
                hashp->dir = NULL;
                hashp->alloc = MEM_ALLOC;
-               hashp->hcxt = DynaHashCxt;
+               hashp->hcxt = CurrentDynaHashCxt;
                hashp->isshared = false;
        }
 
@@ -207,26 +218,13 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
                hashp->alloc = info->alloc;
        else
        {
-               if (flags & HASH_CONTEXT)
-               {
-                       /* hash table structures live in child of given context */
-                       CurrentDynaHashCxt = AllocSetContextCreate(info->hcxt,
-                                                                                                          "DynaHashTable",
-                                                                                               ALLOCSET_DEFAULT_MINSIZE,
-                                                                                          ALLOCSET_DEFAULT_INITSIZE,
-                                                                                          ALLOCSET_DEFAULT_MAXSIZE);
-                       hashp->hcxt = CurrentDynaHashCxt;
-               }
-               else
-               {
-                       /* hash table structures live in child of DynaHashCxt */
-                       CurrentDynaHashCxt = AllocSetContextCreate(DynaHashCxt,
-                                                                                                          "DynaHashTable",
-                                                                                               ALLOCSET_DEFAULT_MINSIZE,
-                                                                                          ALLOCSET_DEFAULT_INITSIZE,
-                                                                                          ALLOCSET_DEFAULT_MAXSIZE);
-                       hashp->hcxt = CurrentDynaHashCxt;
-               }
+               /* remaining hash table structures live in child of given context */
+               hashp->hcxt = AllocSetContextCreate(CurrentDynaHashCxt,
+                                                                                       "DynaHashTable",
+                                                                                       ALLOCSET_DEFAULT_MINSIZE,
+                                                                                       ALLOCSET_DEFAULT_INITSIZE,
+                                                                                       ALLOCSET_DEFAULT_MAXSIZE);
+               CurrentDynaHashCxt = hashp->hcxt;
        }
 
        if (!init_htab(hashp, nelem))
@@ -351,7 +349,7 @@ init_htab(HTAB *hashp, long nelem)
  * NB: assumes that all hash structure parameters have default values!
  */
 long
-hash_estimate_size(long num_entries, long entrysize)
+hash_estimate_size(long num_entries, Size entrysize)
 {
        long            size = 0;
        long            nBuckets,
@@ -447,7 +445,6 @@ void
 hash_stats(const char *where, HTAB *hashp)
 {
 #if HASH_STATISTICS
-
        fprintf(stderr, "%s: this HTAB -- accesses %ld collisions %ld\n",
                        where, hashp->hctl->accesses, hashp->hctl->collisions);
 
@@ -459,19 +456,16 @@ hash_stats(const char *where, HTAB *hashp)
        fprintf(stderr, "hash_stats: total expansions %ld\n",
                        hash_expansions);
 #endif
-
 }
 
 /*******************************SEARCH ROUTINES *****************************/
 
-static uint32
-call_hash(HTAB *hashp, void *k)
-{
-       HASHHDR    *hctl = hashp->hctl;
-       uint32          hash_val,
-                               bucket;
 
-       hash_val = hashp->hash(k, (int) hctl->keysize);
+/* Convert a hash value to a bucket number */
+static inline uint32
+calc_bucket(HASHHDR *hctl, uint32 hash_val)
+{
+       uint32          bucket;
 
        bucket = hash_val & hctl->high_mask;
        if (bucket > hctl->max_bucket)
@@ -506,11 +500,12 @@ call_hash(HTAB *hashp, void *k)
  */
 void *
 hash_search(HTAB *hashp,
-                       void *keyPtr,
+                       const void *keyPtr,
                        HASHACTION action,
                        bool *foundPtr)
 {
        HASHHDR    *hctl = hashp->hctl;
+       uint32          hashvalue = 0;
        uint32          bucket;
        long            segment_num;
        long            segment_ndx;
@@ -545,7 +540,12 @@ hash_search(HTAB *hashp,
        }
        else
        {
-               bucket = call_hash(hashp, keyPtr);
+               HashCompareFunc match;
+               Size            keysize = hctl->keysize;
+
+               hashvalue = hashp->hash(keyPtr, keysize);
+               bucket = calc_bucket(hctl, hashvalue);
+
                segment_num = bucket >> hctl->sshift;
                segment_ndx = MOD(bucket, hctl->ssize);
 
@@ -560,9 +560,11 @@ hash_search(HTAB *hashp,
                /*
                 * Follow collision chain looking for matching key
                 */
+               match = hashp->match;   /* save one fetch in inner loop */
                while (currBucket != NULL)
                {
-                       if (memcmp(ELEMENTKEY(currBucket), keyPtr, hctl->keysize) == 0)
+                       if (currBucket->hashvalue == hashvalue &&
+                               match(ELEMENTKEY(currBucket), keyPtr, keysize) == 0)
                                break;
                        prevBucketPtr = &(currBucket->link);
                        currBucket = *prevBucketPtr;
@@ -641,6 +643,7 @@ hash_search(HTAB *hashp,
                        currBucket->link = NULL;
 
                        /* copy key into record */
+                       currBucket->hashvalue = hashvalue;
                        memcpy(ELEMENTKEY(currBucket), keyPtr, hctl->keysize);
 
                        /* caller is expected to fill the data field on return */
@@ -802,7 +805,7 @@ expand_table(HTAB *hashp)
 
        /*
         * Relocate records to the new bucket.  NOTE: because of the way the
-        * hash masking is done in call_hash, only one old bucket can need to
+        * hash masking is done in calc_bucket, only one old bucket can need to
         * be split at this point.      With a different way of reducing the hash
         * value, that might not be true!
         */
@@ -820,8 +823,7 @@ expand_table(HTAB *hashp)
                 currElement = nextElement)
        {
                nextElement = currElement->link;
-               if ((long) call_hash(hashp, (void *) ELEMENTKEY(currElement))
-                       == old_bucket)
+               if ((long) calc_bucket(hctl, currElement->hashvalue) == old_bucket)
                {
                        *oldlink = currElement;
                        oldlink = &currElement->link;
index 835bd007a9746a0ea6ea689f42a1f4778f9a8fd1..3f7a0089075fa1243afe30500dc33940e9d1d211 100644 (file)
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/utils/hash/hashfn.c,v 1.18 2003/08/04 02:40:06 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/utils/hash/hashfn.c,v 1.19 2003/08/19 01:13:41 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 /*
  * string_hash: hash function for keys that are null-terminated strings.
  *
- * NOTE: since dynahash.c backs this up with a fixed-length memcmp(),
- * the key must actually be zero-padded to the specified maximum length
- * to work correctly.  However, if it is known that nothing after the
- * first zero byte is interesting, this is the right hash function to use.
- *
  * NOTE: this is the default hash function if none is specified.
  */
 uint32
-string_hash(void *key, int keysize)
+string_hash(const void *key, Size keysize)
 {
-       return DatumGetUInt32(hash_any((unsigned char *) key, strlen((char *) key)));
+       return DatumGetUInt32(hash_any((const unsigned char *) key,
+                                                                  (int) strlen((const char *) key)));
 }
 
 /*
  * tag_hash: hash function for fixed-size tag values
  */
 uint32
-tag_hash(void *key, int keysize)
+tag_hash(const void *key, Size keysize)
 {
-       return DatumGetUInt32(hash_any((unsigned char *) key, keysize));
+       return DatumGetUInt32(hash_any((const unsigned char *) key,
+                                                                  (int) keysize));
 }
index af2f123d2d6f329e30cf9c1895e2a53227dd9095..88449034feec56d90ff0c0294c8443fab3c9b6e8 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: executor.h,v 1.99 2003/08/08 21:42:44 momjian Exp $
+ * $Id: executor.h,v 1.100 2003/08/19 01:13:41 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -71,8 +71,6 @@ extern TupleHashTable BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
 extern TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable,
                                         TupleTableSlot *slot,
                                         bool *isnew);
-extern TupleHashEntry ScanTupleHashTable(TupleHashTable hashtable,
-                                  TupleHashIterator *state);
 
 /*
  * prototypes from functions in execJunk.c
index 3f163b8fdaa9aafa574638c4293c4fb3fe59f7fc..8d180009bfd97cb3f2ce34a123fee8de312f9934 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: execnodes.h,v 1.103 2003/08/08 21:42:47 momjian Exp $
+ * $Id: execnodes.h,v 1.104 2003/08/19 01:13:41 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -21,6 +21,7 @@
 #include "nodes/bitmapset.h"
 #include "nodes/params.h"
 #include "nodes/plannodes.h"
+#include "utils/hsearch.h"
 #include "utils/tuplestore.h"
 
 
@@ -344,14 +345,14 @@ typedef struct TupleHashTableData *TupleHashTable;
 
 typedef struct TupleHashEntryData
 {
-       TupleHashEntry next;            /* next entry in same hash bucket */
-       uint32          hashkey;                /* exact hash key of this entry */
+       /* firstTuple must be the first field in this struct! */
        HeapTuple       firstTuple;             /* copy of first tuple in this group */
        /* there may be additional data beyond the end of this struct */
 } TupleHashEntryData;                  /* VARIABLE LENGTH STRUCT */
 
 typedef struct TupleHashTableData
 {
+       HTAB       *hashtab;            /* underlying dynahash table */
        int                     numCols;                /* number of columns in lookup key */
        AttrNumber *keyColIdx;          /* attr numbers of key columns */
        FmgrInfo   *eqfunctions;        /* lookup data for comparison functions */
@@ -359,19 +360,15 @@ typedef struct TupleHashTableData
        MemoryContext tablecxt;         /* memory context containing table */
        MemoryContext tempcxt;          /* context for function evaluations */
        Size            entrysize;              /* actual size to make each hash entry */
-       int                     nbuckets;               /* number of buckets in hash table */
-       TupleHashEntry buckets[1];      /* VARIABLE LENGTH ARRAY */
-} TupleHashTableData;                  /* VARIABLE LENGTH STRUCT */
+       TupleDesc       tupdesc;                /* tuple descriptor */
+} TupleHashTableData;
 
-typedef struct
-{
-       TupleHashEntry next_entry;      /* next entry in current chain */
-       int                     next_bucket;    /* next chain */
-} TupleHashIterator;
+typedef HASH_SEQ_STATUS TupleHashIterator;
 
-#define ResetTupleHashIterator(iter) \
-       ((iter)->next_entry = NULL, \
-        (iter)->next_bucket = 0)
+#define ResetTupleHashIterator(htable, iter) \
+       hash_seq_init(iter, (htable)->hashtab)
+#define ScanTupleHashTable(iter) \
+       ((TupleHashEntry) hash_seq_search(iter))
 
 
 /* ----------------------------------------------------------------
index 905268badc6199e7cee00cf20bf93553daa4bf81..05d26e9a15092b95965fe1fe774a0bdd0812b8a8 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: hsearch.h,v 1.28 2003/08/04 02:40:15 momjian Exp $
+ * $Id: hsearch.h,v 1.29 2003/08/19 01:13:41 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #define HSEARCH_H
 
 
+/*
+ * Hash and comparison functions must have these signatures.  Comparison
+ * functions return zero for match, nonzero for no match.  (The comparison
+ * function definition is designed to allow memcmp() and strncmp() to be
+ * used directly as key comparison functions.)
+ */
+typedef uint32 (*HashValueFunc) (const void *key, Size keysize);
+typedef int (*HashCompareFunc) (const void *key1, const void *key2,
+                                                               Size keysize);
+
+/*
+ * Space allocation function for a hashtable --- designed to match malloc().
+ * Note: there is no free function API; can't destroy a hashtable unless you
+ * use the default allocator.
+ */
+typedef void *(*HashAllocFunc) (Size request);
+
 /*
  * Constants
  *
@@ -44,6 +61,7 @@
 typedef struct HASHELEMENT
 {
        struct HASHELEMENT *link;       /* link to next entry in same bucket */
+       uint32  hashvalue;                      /* hash function result for this entry */
 } HASHELEMENT;
 
 /* A hash bucket is a linked list of HASHELEMENTs */
@@ -64,8 +82,8 @@ typedef struct HASHHDR
        long            ffactor;                /* Fill factor */
        long            nentries;               /* Number of entries in hash table */
        long            nsegs;                  /* Number of allocated segments */
-       long            keysize;                /* hash key length in bytes */
-       long            entrysize;              /* total user element size in bytes */
+       Size            keysize;                /* hash key length in bytes */
+       Size            entrysize;              /* total user element size in bytes */
        long            max_dsize;              /* 'dsize' limit if directory is fixed
                                                                 * size */
        HASHELEMENT *freeList;          /* linked list of free elements */
@@ -83,8 +101,9 @@ typedef struct HTAB
 {
        HASHHDR    *hctl;                       /* shared control information */
        HASHSEGMENT *dir;                       /* directory of segment starts */
-       uint32          (*hash) (void *key, int keysize);               /* Hash Function */
-       void       *(*alloc) (Size);    /* memory allocator */
+       HashValueFunc hash;                     /* hash function */
+       HashCompareFunc match;          /* key comparison function */
+       HashAllocFunc alloc;            /* memory allocator */
        MemoryContext hcxt;                     /* memory context if default allocator
                                                                 * used */
        char       *tabname;            /* table name (for error messages) */
@@ -97,28 +116,30 @@ typedef struct HASHCTL
 {
        long            ssize;                  /* Segment Size */
        long            dsize;                  /* (initial) Directory Size */
-       long            ffactor;                /* Fill factor */
-       uint32          (*hash) (void *key, int keysize);               /* Hash Function */
-       long            keysize;                /* hash key length in bytes */
-       long            entrysize;              /* total user element size in bytes */
        long            max_dsize;              /* limit to dsize if directory size is
                                                                 * limited */
-       void       *(*alloc) (Size);    /* memory allocation function */
+       long            ffactor;                /* Fill factor */
+       Size            keysize;                /* hash key length in bytes */
+       Size            entrysize;              /* total user element size in bytes */
+       HashValueFunc hash;                     /* hash function */
+       HashCompareFunc match;          /* key comparison function */
+       HashAllocFunc alloc;            /* memory allocator */
        HASHSEGMENT *dir;                       /* directory of segment starts */
        HASHHDR    *hctl;                       /* location of header in shared mem */
        MemoryContext hcxt;                     /* memory context to use for allocations */
 } HASHCTL;
 
 /* Flags to indicate which parameters are supplied */
-#define HASH_SEGMENT   0x002   /* Setting segment size */
-#define HASH_DIRSIZE   0x004   /* Setting directory size */
-#define HASH_FFACTOR   0x008   /* Setting fill factor */
+#define HASH_SEGMENT   0x002   /* Set segment size */
+#define HASH_DIRSIZE   0x004   /* Set directory size */
+#define HASH_FFACTOR   0x008   /* Set fill factor */
 #define HASH_FUNCTION  0x010   /* Set user defined hash function */
-#define HASH_ELEM              0x020   /* Setting key/entry size */
-#define HASH_SHARED_MEM 0x040  /* Setting shared mem const */
+#define HASH_ELEM              0x020   /* Set key/entry size */
+#define HASH_SHARED_MEM 0x040  /* Set shared mem const */
 #define HASH_ATTACH            0x080   /* Do not initialize hctl */
-#define HASH_ALLOC             0x100   /* Setting memory allocator */
-#define HASH_CONTEXT   0x200   /* Setting explicit memory context */
+#define HASH_ALLOC             0x100   /* Set memory allocator */
+#define HASH_CONTEXT   0x200   /* Set explicit memory context */
+#define HASH_COMPARE   0x400   /* Set user defined comparison function */
 
 
 /* max_dsize value to indicate expansible directory */
@@ -151,17 +172,17 @@ extern HTAB *hash_create(const char *tabname, long nelem,
                        HASHCTL *info, int flags);
 extern void hash_destroy(HTAB *hashp);
 extern void hash_stats(const char *where, HTAB *hashp);
-extern void *hash_search(HTAB *hashp, void *keyPtr, HASHACTION action,
+extern void *hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action,
                        bool *foundPtr);
 extern void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp);
 extern void *hash_seq_search(HASH_SEQ_STATUS *status);
-extern long hash_estimate_size(long num_entries, long entrysize);
+extern long hash_estimate_size(long num_entries, Size entrysize);
 extern long hash_select_dirsize(long num_entries);
 
 /*
  * prototypes for functions in hashfn.c
  */
-extern uint32 string_hash(void *key, int keysize);
-extern uint32 tag_hash(void *key, int keysize);
+extern uint32 string_hash(const void *key, Size keysize);
+extern uint32 tag_hash(const void *key, Size keysize);
 
 #endif   /* HSEARCH_H */
index 04b52738e6e9236b14b30c975463f7958fef3d5f..bc6b9e4d85d9ba425df987a24a2507a6138d05ce 100644 (file)
@@ -350,183 +350,183 @@ select f3, myaggp01a(*) from t group by f3;
  f3 | myaggp01a 
 ----+-----------
  b  | {}
- a  | {}
  c  | {}
+ a  | {}
 (3 rows)
 
 select f3, myaggp03a(*) from t group by f3;
  f3 | myaggp03a 
 ----+-----------
  b  | {}
- a  | {}
  c  | {}
+ a  | {}
 (3 rows)
 
 select f3, myaggp03b(*) from t group by f3;
  f3 | myaggp03b 
 ----+-----------
  b  | {}
- a  | {}
  c  | {}
+ a  | {}
 (3 rows)
 
 select f3, myaggp05a(f1) from t group by f3;
  f3 | myaggp05a 
 ----+-----------
  b  | {1,2,3}
- a  | {1,2,3}
  c  | {1,2}
+ a  | {1,2,3}
 (3 rows)
 
 select f3, myaggp06a(f1) from t group by f3;
  f3 | myaggp06a 
 ----+-----------
  b  | {}
- a  | {}
  c  | {}
+ a  | {}
 (3 rows)
 
 select f3, myaggp08a(f1) from t group by f3;
  f3 | myaggp08a 
 ----+-----------
  b  | {}
- a  | {}
  c  | {}
+ a  | {}
 (3 rows)
 
 select f3, myaggp09a(f1) from t group by f3;
  f3 | myaggp09a 
 ----+-----------
  b  | {}
- a  | {}
  c  | {}
+ a  | {}
 (3 rows)
 
 select f3, myaggp09b(f1) from t group by f3;
  f3 | myaggp09b 
 ----+-----------
  b  | {}
- a  | {}
  c  | {}
+ a  | {}
 (3 rows)
 
 select f3, myaggp10a(f1) from t group by f3;
  f3 | myaggp10a 
 ----+-----------
  b  | {1,2,3}
- a  | {1,2,3}
  c  | {1,2}
+ a  | {1,2,3}
 (3 rows)
 
 select f3, myaggp10b(f1) from t group by f3;
  f3 | myaggp10b 
 ----+-----------
  b  | {1,2,3}
- a  | {1,2,3}
  c  | {1,2}
+ a  | {1,2,3}
 (3 rows)
 
 select f3, myaggp20a(f1) from t group by f3;
  f3 | myaggp20a 
 ----+-----------
  b  | {1,2,3}
- a  | {1,2,3}
  c  | {1,2}
+ a  | {1,2,3}
 (3 rows)
 
 select f3, myaggp20b(f1) from t group by f3;
  f3 | myaggp20b 
 ----+-----------
  b  | {1,2,3}
- a  | {1,2,3}
  c  | {1,2}
+ a  | {1,2,3}
 (3 rows)
 
 select f3, myaggn01a(*) from t group by f3;
  f3 | myaggn01a 
 ----+-----------
  b  | {}
- a  | {}
  c  | {}
+ a  | {}
 (3 rows)
 
 select f3, myaggn01b(*) from t group by f3;
  f3 | myaggn01b 
 ----+-----------
  b  | {}
- a  | {}
  c  | {}
+ a  | {}
 (3 rows)
 
 select f3, myaggn03a(*) from t group by f3;
  f3 | myaggn03a 
 ----+-----------
  b  | {}
- a  | {}
  c  | {}
+ a  | {}
 (3 rows)
 
 select f3, myaggn05a(f1) from t group by f3;
  f3 | myaggn05a 
 ----+-----------
  b  | {1,2,3}
- a  | {1,2,3}
  c  | {1,2}
+ a  | {1,2,3}
 (3 rows)
 
 select f3, myaggn05b(f1) from t group by f3;
  f3 | myaggn05b 
 ----+-----------
  b  | {1,2,3}
- a  | {1,2,3}
  c  | {1,2}
+ a  | {1,2,3}
 (3 rows)
 
 select f3, myaggn06a(f1) from t group by f3;
  f3 | myaggn06a 
 ----+-----------
  b  | {}
- a  | {}
  c  | {}
+ a  | {}
 (3 rows)
 
 select f3, myaggn06b(f1) from t group by f3;
  f3 | myaggn06b 
 ----+-----------
  b  | {}
- a  | {}
  c  | {}
+ a  | {}
 (3 rows)
 
 select f3, myaggn08a(f1) from t group by f3;
  f3 | myaggn08a 
 ----+-----------
  b  | {}
- a  | {}
  c  | {}
+ a  | {}
 (3 rows)
 
 select f3, myaggn08b(f1) from t group by f3;
  f3 | myaggn08b 
 ----+-----------
  b  | {}
- a  | {}
  c  | {}
+ a  | {}
 (3 rows)
 
 select f3, myaggn09a(f1) from t group by f3;
  f3 | myaggn09a 
 ----+-----------
  b  | {}
- a  | {}
  c  | {}
+ a  | {}
 (3 rows)
 
 select f3, myaggn10a(f1) from t group by f3;
  f3 | myaggn10a 
 ----+-----------
  b  | {1,2,3}
- a  | {1,2,3}
  c  | {1,2}
+ a  | {1,2,3}
 (3 rows)