Improve dynahash.c's API so that caller can specify the comparison function

author Tom Lane <tgl@sss.pgh.pa.us>

Tue, 19 Aug 2003 01:13:41 +0000 (01:13 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Tue, 19 Aug 2003 01:13:41 +0000 (01:13 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Tue, 19 Aug 2003 01:13:41 +0000 (01:13 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Tue, 19 Aug 2003 01:13:41 +0000 (01:13 +0000)
diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c

index 100e7a1c375caa47a6b724a518bc872c3b5e83fc..d293bb7ff29a2e107f68f8ba98b6ec5c04710463 100644 (file)
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/executor/execGrouping.c,v 1.7 2003/08/08 21:41:34 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/executor/execGrouping.c,v 1.8 2003/08/19 01:13:40 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -23,6 +23,13 @@
  #include "utils/syscache.h"
  
  
+static TupleHashTable CurTupleHashTable = NULL;
+
+static uint32 TupleHashTableHash(const void *key, Size keysize);
+static int     TupleHashTableMatch(const void *key1, const void *key2,
+                                                               Size keysize);
+
+
  /*****************************************************************************
   *             Utility routines for grouping tuples together
   *****************************************************************************/
@@ -272,7 +279,7 @@ execTuplesHashPrepare(TupleDesc tupdesc,
   *     numCols, keyColIdx: identify the tuple fields to use as lookup key
   *     eqfunctions: equality comparison functions to use
   *     hashfunctions: datatype-specific hashing functions to use
- *     nbuckets: number of buckets to make
+ *     nbuckets: initial estimate of hashtable size
   *     entrysize: size of each entry (at least sizeof(TupleHashEntryData))
   *     tablecxt: memory context in which to store table and table entries
   *     tempcxt: short-lived context for evaluation hash and comparison functions
@@ -290,14 +297,13 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
                                         MemoryContext tablecxt, MemoryContext tempcxt)
  {
         TupleHashTable hashtable;
-       Size            tabsize;
+       HASHCTL         hash_ctl;
  
         Assert(nbuckets > 0);
         Assert(entrysize >= sizeof(TupleHashEntryData));
  
-       tabsize = sizeof(TupleHashTableData) +
-               (nbuckets - 1) *sizeof(TupleHashEntry);
-       hashtable = (TupleHashTable) MemoryContextAllocZero(tablecxt, tabsize);
+       hashtable = (TupleHashTable) MemoryContextAlloc(tablecxt,
+                                                                                               sizeof(TupleHashTableData));
  
         hashtable->numCols = numCols;
         hashtable->keyColIdx = keyColIdx;
@@ -306,7 +312,20 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
         hashtable->tablecxt = tablecxt;
         hashtable->tempcxt = tempcxt;
         hashtable->entrysize = entrysize;
-       hashtable->nbuckets = nbuckets;
+
+       MemSet(&hash_ctl, 0, sizeof(hash_ctl));
+       hash_ctl.keysize = sizeof(TupleHashEntryData);
+       hash_ctl.entrysize = entrysize;
+       hash_ctl.hash = TupleHashTableHash;
+       hash_ctl.match = TupleHashTableMatch;
+       hash_ctl.hcxt = tablecxt;
+       hashtable->hashtab = hash_create("TupleHashTable", (long) nbuckets,
+                                                                        &hash_ctl,
+                                       HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT);
+       if (hashtable->hashtab == NULL)
+               ereport(ERROR,
+                               (errcode(ERRCODE_OUT_OF_MEMORY),
+                                errmsg("out of memory")));
  
         return hashtable;
  }
@@ -327,19 +346,93 @@ TupleHashEntry
  LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
                                          bool *isnew)
  {
-       int                     numCols = hashtable->numCols;
-       AttrNumber *keyColIdx = hashtable->keyColIdx;
         HeapTuple       tuple = slot->val;
         TupleDesc       tupdesc = slot->ttc_tupleDescriptor;
-       uint32          hashkey = 0;
-       int                     i;
-       int                     bucketno;
         TupleHashEntry entry;
         MemoryContext oldContext;
+       TupleHashTable saveCurHT;
+       bool            found;
  
-       /* Need to run the hash function in short-lived context */
+       /* Need to run the hash functions in short-lived context */
         oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
  
+       /*
+        * Set up data needed by hash and match functions
+        *
+        * We save and restore CurTupleHashTable just in case someone manages
+        * to invoke this code re-entrantly.
+        */
+       hashtable->tupdesc = tupdesc;
+       saveCurHT = CurTupleHashTable;
+       CurTupleHashTable = hashtable;
+
+       /* Search the hash table */
+       entry = (TupleHashEntry) hash_search(hashtable->hashtab,
+                                                                                &tuple,
+                                                                                isnew ? HASH_ENTER : HASH_FIND,
+                                                                                &found);
+
+       if (isnew)
+       {
+               if (found)
+               {
+                       /* found pre-existing entry */
+                       *isnew = false;
+               }
+               else
+               {
+                       /* created new entry ... we hope */
+                       if (entry == NULL)
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_OUT_OF_MEMORY),
+                                                errmsg("out of memory")));
+
+                       /*
+                        * Zero any caller-requested space in the entry.  (This zaps
+                        * the "key data" dynahash.c copied into the new entry, but
+                        * we don't care since we're about to overwrite it anyway.)
+                        */
+                       MemSet(entry, 0, hashtable->entrysize);
+
+                       /* Copy the first tuple into the table context */
+                       MemoryContextSwitchTo(hashtable->tablecxt);
+                       entry->firstTuple = heap_copytuple(tuple);
+
+                       *isnew = true;
+               }
+       }
+
+       CurTupleHashTable = saveCurHT;
+
+       MemoryContextSwitchTo(oldContext);
+
+       return entry;
+}
+
+/*
+ * Compute the hash value for a tuple
+ *
+ * The passed-in key is a pointer to a HeapTuple pointer -- this is either
+ * the firstTuple field of a TupleHashEntry struct, or the key value passed
+ * to hash_search.  We ignore the keysize.
+ *
+ * CurTupleHashTable must be set before calling this, since dynahash.c
+ * doesn't provide any API that would let us get at the hashtable otherwise.
+ *
+ * Also, the caller must select an appropriate memory context for running
+ * the hash functions.  (dynahash.c doesn't change CurrentMemoryContext.)
+ */
+static uint32
+TupleHashTableHash(const void *key, Size keysize)
+{
+       HeapTuple       tuple = *(const HeapTuple *) key;
+       TupleHashTable hashtable = CurTupleHashTable;
+       int                     numCols = hashtable->numCols;
+       AttrNumber *keyColIdx = hashtable->keyColIdx;
+       TupleDesc       tupdesc = hashtable->tupdesc;
+       uint32          hashkey = 0;
+       int                     i;
+
         for (i = 0; i < numCols; i++)
         {
                 AttrNumber      att = keyColIdx[i];
@@ -360,72 +453,36 @@ LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
                         hashkey ^= hkey;
                 }
         }
-       bucketno = hashkey % (uint32) hashtable->nbuckets;
-
-       for (entry = hashtable->buckets[bucketno];
-                entry != NULL;
-                entry = entry->next)
-       {
-               /* Quick check using hashkey */
-               if (entry->hashkey != hashkey)
-                       continue;
-               if (execTuplesMatch(entry->firstTuple,
-                                                       tuple,
-                                                       tupdesc,
-                                                       numCols, keyColIdx,
-                                                       hashtable->eqfunctions,
-                                                       hashtable->tempcxt))
-               {
-                       if (isnew)
-                               *isnew = false;
-                       MemoryContextSwitchTo(oldContext);
-                       return entry;
-               }
-       }
-
-       /* Not there, so build a new one if requested */
-       if (isnew)
-       {
-               MemoryContextSwitchTo(hashtable->tablecxt);
-
-               entry = (TupleHashEntry) palloc0(hashtable->entrysize);
-
-               entry->hashkey = hashkey;
-               entry->firstTuple = heap_copytuple(tuple);
-
-               entry->next = hashtable->buckets[bucketno];
-               hashtable->buckets[bucketno] = entry;
-
-               *isnew = true;
-       }
-
-       MemoryContextSwitchTo(oldContext);
  
-       return entry;
+       return hashkey;
  }
  
  /*
- * Walk through all the entries of a hash table, in no special order.
- * Returns NULL when no more entries remain.
+ * See whether two tuples (presumably of the same hash value) match
+ *
+ * As above, the passed pointers are pointers to HeapTuple pointers.
   *
- * Iterator state must be initialized with ResetTupleHashIterator() macro.
+ * CurTupleHashTable must be set before calling this, since dynahash.c
+ * doesn't provide any API that would let us get at the hashtable otherwise.
+ *
+ * Also, the caller must select an appropriate memory context for running
+ * the compare functions.  (dynahash.c doesn't change CurrentMemoryContext.)
   */
-TupleHashEntry
-ScanTupleHashTable(TupleHashTable hashtable, TupleHashIterator *state)
+static int
+TupleHashTableMatch(const void *key1, const void *key2, Size keysize)
  {
-       TupleHashEntry entry;
-
-       entry = state->next_entry;
-       while (entry == NULL)
-       {
-               if (state->next_bucket >= hashtable->nbuckets)
-               {
-                       /* No more entries in hashtable, so done */
-                       return NULL;
-               }
-               entry = hashtable->buckets[state->next_bucket++];
-       }
-       state->next_entry = entry->next;
-
-       return entry;
+       HeapTuple       tuple1 = *(const HeapTuple *) key1;
+       HeapTuple       tuple2 = *(const HeapTuple *) key2;
+       TupleHashTable hashtable = CurTupleHashTable;
+
+       if (execTuplesMatch(tuple1,
+                                               tuple2,
+                                               hashtable->tupdesc,
+                                               hashtable->numCols,
+                                               hashtable->keyColIdx,
+                                               hashtable->eqfunctions,
+                                               hashtable->tempcxt))
+               return 0;
+       else
+               return 1;
  }
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c

index d8fb9a9565da6919d8a9adf07ebbc3d5fe564da9..d9adb09dafbe05ae10adf618cb63a969c478c667 100644 (file)
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -45,7 +45,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.115 2003/08/08 21:41:41 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.116 2003/08/19 01:13:40 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -905,7 +905,7 @@ agg_fill_hash_table(AggState *aggstate)
  
         aggstate->table_filled = true;
         /* Initialize to walk the hash table */
-       ResetTupleHashIterator(&aggstate->hashiter);
+       ResetTupleHashIterator(aggstate->hashtable, &aggstate->hashiter);
  }
  
  /*
@@ -920,7 +920,6 @@ agg_retrieve_hash_table(AggState *aggstate)
         bool       *aggnulls;
         AggStatePerAgg peragg;
         AggStatePerGroup pergroup;
-       TupleHashTable hashtable;
         AggHashEntry entry;
         TupleTableSlot *firstSlot;
         TupleTableSlot *resultSlot;
@@ -935,7 +934,6 @@ agg_retrieve_hash_table(AggState *aggstate)
         aggnulls = econtext->ecxt_aggnulls;
         projInfo = aggstate->ss.ps.ps_ProjInfo;
         peragg = aggstate->peragg;
-       hashtable = aggstate->hashtable;
         firstSlot = aggstate->ss.ss_ScanTupleSlot;
  
         /*
@@ -950,8 +948,7 @@ agg_retrieve_hash_table(AggState *aggstate)
                 /*
                  * Find the next entry in the hash table
                  */
-               entry = (AggHashEntry) ScanTupleHashTable(hashtable,
-                                                                                                 &aggstate->hashiter);
+               entry = (AggHashEntry) ScanTupleHashTable(&aggstate->hashiter);
                 if (entry == NULL)
                 {
                         /* No more entries in hashtable, so done */
@@ -1440,7 +1437,7 @@ ExecReScanAgg(AggState *node, ExprContext *exprCtxt)
                  */
                 if (((PlanState *) node)->lefttree->chgParam == NULL)
                 {
-                       ResetTupleHashIterator(&node->hashiter);
+                       ResetTupleHashIterator(node->hashtable, &node->hashiter);
                         return;
                 }
         }
diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c

index 7530be263f32b0d286e8bfb5f9a43d368ae6550c..23b0cd3bf3d6c9499698cadfeea1b78dc33ab2c3 100644 (file)
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/executor/nodeSubplan.c,v 1.54 2003/08/08 21:41:42 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/executor/nodeSubplan.c,v 1.55 2003/08/19 01:13:40 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -627,8 +627,8 @@ findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot)
         TupleHashIterator hashiter;
         TupleHashEntry entry;
  
-       ResetTupleHashIterator(&hashiter);
-       while ((entry = ScanTupleHashTable(hashtable, &hashiter)) != NULL)
+       ResetTupleHashIterator(hashtable, &hashiter);
+       while ((entry = ScanTupleHashTable(&hashiter)) != NULL)
         {
                 if (!execTuplesUnequal(entry->firstTuple,
                                                            tuple,
diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c

index 7090d000587c7fb704106f023bddedc4b405db51..c6f9b0236975f153b3cdea846313e73161d6a1e2 100644 (file)
--- a/src/backend/utils/hash/dynahash.c
+++ b/src/backend/utils/hash/dynahash.c
@@ -9,7 +9,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/utils/hash/dynahash.c,v 1.47 2003/08/04 02:40:06 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/utils/hash/dynahash.c,v 1.48 2003/08/19 01:13:41 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -44,7 +44,6 @@
  
  #include "postgres.h"
  
-
  #include "utils/dynahash.h"
  #include "utils/hsearch.h"
  #include "utils/memutils.h"
@@ -63,7 +62,6 @@
   * Private function prototypes
   */
  static void *DynaHashAlloc(Size size);
-static uint32 call_hash(HTAB *hashp, void *k);
  static HASHSEGMENT seg_alloc(HTAB *hashp);
  static bool element_alloc(HTAB *hashp);
  static bool dir_realloc(HTAB *hashp);
@@ -133,6 +131,19 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
         else
                 hashp->hash = string_hash;              /* default hash function */
  
+       /*
+        * If you don't specify a match function, it defaults to strncmp() if
+        * you used string_hash (either explicitly or by default) and to
+        * memcmp() otherwise.  (Prior to PostgreSQL 7.4, memcmp() was always
+        * used.)
+        */
+       if (flags & HASH_COMPARE)
+               hashp->match = info->match;
+       else if (hashp->hash == string_hash)
+               hashp->match = (HashCompareFunc) strncmp;
+       else
+               hashp->match = memcmp;
+
         if (flags & HASH_SHARED_MEM)
         {
                 /*
@@ -155,7 +166,7 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
                 hashp->hctl = NULL;
                 hashp->dir = NULL;
                 hashp->alloc = MEM_ALLOC;
-               hashp->hcxt = DynaHashCxt;
+               hashp->hcxt = CurrentDynaHashCxt;
                 hashp->isshared = false;
         }
  
@@ -207,26 +218,13 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
                 hashp->alloc = info->alloc;
         else
         {
-               if (flags & HASH_CONTEXT)
-               {
-                       /* hash table structures live in child of given context */
-                       CurrentDynaHashCxt = AllocSetContextCreate(info->hcxt,
-                                                                                                          "DynaHashTable",
-                                                                                               ALLOCSET_DEFAULT_MINSIZE,
-                                                                                          ALLOCSET_DEFAULT_INITSIZE,
-                                                                                          ALLOCSET_DEFAULT_MAXSIZE);
-                       hashp->hcxt = CurrentDynaHashCxt;
-               }
-               else
-               {
-                       /* hash table structures live in child of DynaHashCxt */
-                       CurrentDynaHashCxt = AllocSetContextCreate(DynaHashCxt,
-                                                                                                          "DynaHashTable",
-                                                                                               ALLOCSET_DEFAULT_MINSIZE,
-                                                                                          ALLOCSET_DEFAULT_INITSIZE,
-                                                                                          ALLOCSET_DEFAULT_MAXSIZE);
-                       hashp->hcxt = CurrentDynaHashCxt;
-               }
+               /* remaining hash table structures live in child of given context */
+               hashp->hcxt = AllocSetContextCreate(CurrentDynaHashCxt,
+                                                                                       "DynaHashTable",
+                                                                                       ALLOCSET_DEFAULT_MINSIZE,
+                                                                                       ALLOCSET_DEFAULT_INITSIZE,
+                                                                                       ALLOCSET_DEFAULT_MAXSIZE);
+               CurrentDynaHashCxt = hashp->hcxt;
         }
  
         if (!init_htab(hashp, nelem))
@@ -351,7 +349,7 @@ init_htab(HTAB *hashp, long nelem)
   * NB: assumes that all hash structure parameters have default values!
   */
  long
-hash_estimate_size(long num_entries, long entrysize)
+hash_estimate_size(long num_entries, Size entrysize)
  {
         long            size = 0;
         long            nBuckets,
@@ -447,7 +445,6 @@ void
  hash_stats(const char *where, HTAB *hashp)
  {
  #if HASH_STATISTICS
-
         fprintf(stderr, "%s: this HTAB -- accesses %ld collisions %ld\n",
                         where, hashp->hctl->accesses, hashp->hctl->collisions);
  
@@ -459,19 +456,16 @@ hash_stats(const char *where, HTAB *hashp)
         fprintf(stderr, "hash_stats: total expansions %ld\n",
                         hash_expansions);
  #endif
-
  }
  
  /*******************************SEARCH ROUTINES *****************************/
  
-static uint32
-call_hash(HTAB *hashp, void *k)
-{
-       HASHHDR    *hctl = hashp->hctl;
-       uint32          hash_val,
-                               bucket;
  
-       hash_val = hashp->hash(k, (int) hctl->keysize);
+/* Convert a hash value to a bucket number */
+static inline uint32
+calc_bucket(HASHHDR *hctl, uint32 hash_val)
+{
+       uint32          bucket;
  
         bucket = hash_val & hctl->high_mask;
         if (bucket > hctl->max_bucket)
@@ -506,11 +500,12 @@ call_hash(HTAB *hashp, void *k)
   */
  void *
  hash_search(HTAB *hashp,
-                       void *keyPtr,
+                       const void *keyPtr,
                         HASHACTION action,
                         bool *foundPtr)
  {
         HASHHDR    *hctl = hashp->hctl;
+       uint32          hashvalue = 0;
         uint32          bucket;
         long            segment_num;
         long            segment_ndx;
@@ -545,7 +540,12 @@ hash_search(HTAB *hashp,
         }
         else
         {
-               bucket = call_hash(hashp, keyPtr);
+               HashCompareFunc match;
+               Size            keysize = hctl->keysize;
+
+               hashvalue = hashp->hash(keyPtr, keysize);
+               bucket = calc_bucket(hctl, hashvalue);
+
                 segment_num = bucket >> hctl->sshift;
                 segment_ndx = MOD(bucket, hctl->ssize);
  
@@ -560,9 +560,11 @@ hash_search(HTAB *hashp,
                 /*
                  * Follow collision chain looking for matching key
                  */
+               match = hashp->match;   /* save one fetch in inner loop */
                 while (currBucket != NULL)
                 {
-                       if (memcmp(ELEMENTKEY(currBucket), keyPtr, hctl->keysize) == 0)
+                       if (currBucket->hashvalue == hashvalue &&
+                               match(ELEMENTKEY(currBucket), keyPtr, keysize) == 0)
                                 break;
                         prevBucketPtr = &(currBucket->link);
                         currBucket = *prevBucketPtr;
@@ -641,6 +643,7 @@ hash_search(HTAB *hashp,
                         currBucket->link = NULL;
  
                         /* copy key into record */
+                       currBucket->hashvalue = hashvalue;
                         memcpy(ELEMENTKEY(currBucket), keyPtr, hctl->keysize);
  
                         /* caller is expected to fill the data field on return */
@@ -802,7 +805,7 @@ expand_table(HTAB *hashp)
  
         /*
          * Relocate records to the new bucket.  NOTE: because of the way the
-        * hash masking is done in call_hash, only one old bucket can need to
+        * hash masking is done in calc_bucket, only one old bucket can need to
          * be split at this point.      With a different way of reducing the hash
          * value, that might not be true!
          */
@@ -820,8 +823,7 @@ expand_table(HTAB *hashp)
                  currElement = nextElement)
         {
                 nextElement = currElement->link;
-               if ((long) call_hash(hashp, (void *) ELEMENTKEY(currElement))
-                       == old_bucket)
+               if ((long) calc_bucket(hctl, currElement->hashvalue) == old_bucket)
                 {
                         *oldlink = currElement;
                         oldlink = &currElement->link;
diff --git a/src/backend/utils/hash/hashfn.c b/src/backend/utils/hash/hashfn.c

index 835bd007a9746a0ea6ea689f42a1f4778f9a8fd1..3f7a0089075fa1243afe30500dc33940e9d1d211 100644 (file)
--- a/src/backend/utils/hash/hashfn.c
+++ b/src/backend/utils/hash/hashfn.c
@@ -9,7 +9,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/utils/hash/hashfn.c,v 1.18 2003/08/04 02:40:06 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/utils/hash/hashfn.c,v 1.19 2003/08/19 01:13:41 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -22,24 +22,21 @@
  /*
   * string_hash: hash function for keys that are null-terminated strings.
   *
- * NOTE: since dynahash.c backs this up with a fixed-length memcmp(),
- * the key must actually be zero-padded to the specified maximum length
- * to work correctly.  However, if it is known that nothing after the
- * first zero byte is interesting, this is the right hash function to use.
- *
   * NOTE: this is the default hash function if none is specified.
   */
  uint32
-string_hash(void *key, int keysize)
+string_hash(const void *key, Size keysize)
  {
-       return DatumGetUInt32(hash_any((unsigned char *) key, strlen((char *) key)));
+       return DatumGetUInt32(hash_any((const unsigned char *) key,
+                                                                  (int) strlen((const char *) key)));
  }
  
  /*
   * tag_hash: hash function for fixed-size tag values
   */
  uint32
-tag_hash(void *key, int keysize)
+tag_hash(const void *key, Size keysize)
  {
-       return DatumGetUInt32(hash_any((unsigned char *) key, keysize));
+       return DatumGetUInt32(hash_any((const unsigned char *) key,
+                                                                  (int) keysize));
  }
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h

index af2f123d2d6f329e30cf9c1895e2a53227dd9095..88449034feec56d90ff0c0294c8443fab3c9b6e8 100644 (file)
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: executor.h,v 1.99 2003/08/08 21:42:44 momjian Exp $
+ * $Id: executor.h,v 1.100 2003/08/19 01:13:41 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -71,8 +71,6 @@ extern TupleHashTable BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
  extern TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable,
                                          TupleTableSlot *slot,
                                          bool *isnew);
-extern TupleHashEntry ScanTupleHashTable(TupleHashTable hashtable,
-                                  TupleHashIterator *state);
  
  /*
   * prototypes from functions in execJunk.c
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h

index 3f163b8fdaa9aafa574638c4293c4fb3fe59f7fc..8d180009bfd97cb3f2ce34a123fee8de312f9934 100644 (file)
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: execnodes.h,v 1.103 2003/08/08 21:42:47 momjian Exp $
+ * $Id: execnodes.h,v 1.104 2003/08/19 01:13:41 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -21,6 +21,7 @@
  #include "nodes/bitmapset.h"
  #include "nodes/params.h"
  #include "nodes/plannodes.h"
+#include "utils/hsearch.h"
  #include "utils/tuplestore.h"
  
  
@@ -344,14 +345,14 @@ typedef struct TupleHashTableData *TupleHashTable;
  
  typedef struct TupleHashEntryData
  {
-       TupleHashEntry next;            /* next entry in same hash bucket */
-       uint32          hashkey;                /* exact hash key of this entry */
+       /* firstTuple must be the first field in this struct! */
         HeapTuple       firstTuple;             /* copy of first tuple in this group */
         /* there may be additional data beyond the end of this struct */
  } TupleHashEntryData;                  /* VARIABLE LENGTH STRUCT */
  
  typedef struct TupleHashTableData
  {
+       HTAB       *hashtab;            /* underlying dynahash table */
         int                     numCols;                /* number of columns in lookup key */
         AttrNumber *keyColIdx;          /* attr numbers of key columns */
         FmgrInfo   *eqfunctions;        /* lookup data for comparison functions */
@@ -359,19 +360,15 @@ typedef struct TupleHashTableData
         MemoryContext tablecxt;         /* memory context containing table */
         MemoryContext tempcxt;          /* context for function evaluations */
         Size            entrysize;              /* actual size to make each hash entry */
-       int                     nbuckets;               /* number of buckets in hash table */
-       TupleHashEntry buckets[1];      /* VARIABLE LENGTH ARRAY */
-} TupleHashTableData;                  /* VARIABLE LENGTH STRUCT */
+       TupleDesc       tupdesc;                /* tuple descriptor */
+} TupleHashTableData;
  
-typedef struct
-{
-       TupleHashEntry next_entry;      /* next entry in current chain */
-       int                     next_bucket;    /* next chain */
-} TupleHashIterator;
+typedef HASH_SEQ_STATUS TupleHashIterator;
  
-#define ResetTupleHashIterator(iter) \
-       ((iter)->next_entry = NULL, \
-        (iter)->next_bucket = 0)
+#define ResetTupleHashIterator(htable, iter) \
+       hash_seq_init(iter, (htable)->hashtab)
+#define ScanTupleHashTable(iter) \
+       ((TupleHashEntry) hash_seq_search(iter))
  
  
  /* ----------------------------------------------------------------
diff --git a/src/include/utils/hsearch.h b/src/include/utils/hsearch.h

index 905268badc6199e7cee00cf20bf93553daa4bf81..05d26e9a15092b95965fe1fe774a0bdd0812b8a8 100644 (file)
--- a/src/include/utils/hsearch.h
+++ b/src/include/utils/hsearch.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: hsearch.h,v 1.28 2003/08/04 02:40:15 momjian Exp $
+ * $Id: hsearch.h,v 1.29 2003/08/19 01:13:41 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -15,6 +15,23 @@
  #define HSEARCH_H
  
  
+/*
+ * Hash and comparison functions must have these signatures.  Comparison
+ * functions return zero for match, nonzero for no match.  (The comparison
+ * function definition is designed to allow memcmp() and strncmp() to be
+ * used directly as key comparison functions.)
+ */
+typedef uint32 (*HashValueFunc) (const void *key, Size keysize);
+typedef int (*HashCompareFunc) (const void *key1, const void *key2,
+                                                               Size keysize);
+
+/*
+ * Space allocation function for a hashtable --- designed to match malloc().
+ * Note: there is no free function API; can't destroy a hashtable unless you
+ * use the default allocator.
+ */
+typedef void *(*HashAllocFunc) (Size request);
+
  /*
   * Constants
   *
@@ -44,6 +61,7 @@
  typedef struct HASHELEMENT
  {
         struct HASHELEMENT *link;       /* link to next entry in same bucket */
+       uint32  hashvalue;                      /* hash function result for this entry */
  } HASHELEMENT;
  
  /* A hash bucket is a linked list of HASHELEMENTs */
@@ -64,8 +82,8 @@ typedef struct HASHHDR
         long            ffactor;                /* Fill factor */
         long            nentries;               /* Number of entries in hash table */
         long            nsegs;                  /* Number of allocated segments */
-       long            keysize;                /* hash key length in bytes */
-       long            entrysize;              /* total user element size in bytes */
+       Size            keysize;                /* hash key length in bytes */
+       Size            entrysize;              /* total user element size in bytes */
         long            max_dsize;              /* 'dsize' limit if directory is fixed
                                                                  * size */
         HASHELEMENT *freeList;          /* linked list of free elements */
@@ -83,8 +101,9 @@ typedef struct HTAB
  {
         HASHHDR    *hctl;                       /* shared control information */
         HASHSEGMENT *dir;                       /* directory of segment starts */
-       uint32          (*hash) (void *key, int keysize);               /* Hash Function */
-       void       *(*alloc) (Size);    /* memory allocator */
+       HashValueFunc hash;                     /* hash function */
+       HashCompareFunc match;          /* key comparison function */
+       HashAllocFunc alloc;            /* memory allocator */
         MemoryContext hcxt;                     /* memory context if default allocator
                                                                  * used */
         char       *tabname;            /* table name (for error messages) */
@@ -97,28 +116,30 @@ typedef struct HASHCTL
  {
         long            ssize;                  /* Segment Size */
         long            dsize;                  /* (initial) Directory Size */
-       long            ffactor;                /* Fill factor */
-       uint32          (*hash) (void *key, int keysize);               /* Hash Function */
-       long            keysize;                /* hash key length in bytes */
-       long            entrysize;              /* total user element size in bytes */
         long            max_dsize;              /* limit to dsize if directory size is
                                                                  * limited */
-       void       *(*alloc) (Size);    /* memory allocation function */
+       long            ffactor;                /* Fill factor */
+       Size            keysize;                /* hash key length in bytes */
+       Size            entrysize;              /* total user element size in bytes */
+       HashValueFunc hash;                     /* hash function */
+       HashCompareFunc match;          /* key comparison function */
+       HashAllocFunc alloc;            /* memory allocator */
         HASHSEGMENT *dir;                       /* directory of segment starts */
         HASHHDR    *hctl;                       /* location of header in shared mem */
         MemoryContext hcxt;                     /* memory context to use for allocations */
  } HASHCTL;
  
  /* Flags to indicate which parameters are supplied */
-#define HASH_SEGMENT   0x002   /* Setting segment size */
-#define HASH_DIRSIZE   0x004   /* Setting directory size */
-#define HASH_FFACTOR   0x008   /* Setting fill factor */
+#define HASH_SEGMENT   0x002   /* Set segment size */
+#define HASH_DIRSIZE   0x004   /* Set directory size */
+#define HASH_FFACTOR   0x008   /* Set fill factor */
  #define HASH_FUNCTION  0x010   /* Set user defined hash function */
-#define HASH_ELEM              0x020   /* Setting key/entry size */
-#define HASH_SHARED_MEM 0x040  /* Setting shared mem const */
+#define HASH_ELEM              0x020   /* Set key/entry size */
+#define HASH_SHARED_MEM 0x040  /* Set shared mem const */
  #define HASH_ATTACH            0x080   /* Do not initialize hctl */
-#define HASH_ALLOC             0x100   /* Setting memory allocator */
-#define HASH_CONTEXT   0x200   /* Setting explicit memory context */
+#define HASH_ALLOC             0x100   /* Set memory allocator */
+#define HASH_CONTEXT   0x200   /* Set explicit memory context */
+#define HASH_COMPARE   0x400   /* Set user defined comparison function */
  
  
  /* max_dsize value to indicate expansible directory */
@@ -151,17 +172,17 @@ extern HTAB *hash_create(const char *tabname, long nelem,
                         HASHCTL *info, int flags);
  extern void hash_destroy(HTAB *hashp);
  extern void hash_stats(const char *where, HTAB *hashp);
-extern void *hash_search(HTAB *hashp, void *keyPtr, HASHACTION action,
+extern void *hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action,
                         bool *foundPtr);
  extern void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp);
  extern void *hash_seq_search(HASH_SEQ_STATUS *status);
-extern long hash_estimate_size(long num_entries, long entrysize);
+extern long hash_estimate_size(long num_entries, Size entrysize);
  extern long hash_select_dirsize(long num_entries);
  
  /*
   * prototypes for functions in hashfn.c
   */
-extern uint32 string_hash(void *key, int keysize);
-extern uint32 tag_hash(void *key, int keysize);
+extern uint32 string_hash(const void *key, Size keysize);
+extern uint32 tag_hash(const void *key, Size keysize);
  
  #endif   /* HSEARCH_H */
diff --git a/src/test/regress/expected/polymorphism.out b/src/test/regress/expected/polymorphism.out

index 04b52738e6e9236b14b30c975463f7958fef3d5f..bc6b9e4d85d9ba425df987a24a2507a6138d05ce 100644 (file)
--- a/src/test/regress/expected/polymorphism.out
+++ b/src/test/regress/expected/polymorphism.out
@@ -350,183 +350,183 @@ select f3, myaggp01a(*) from t group by f3;
   f3 | myaggp01a 
  ----+-----------
   b  | {}
- a  | {}
   c  | {}
+ a  | {}
  (3 rows)
  
  select f3, myaggp03a(*) from t group by f3;
   f3 | myaggp03a 
  ----+-----------
   b  | {}
- a  | {}
   c  | {}
+ a  | {}
  (3 rows)
  
  select f3, myaggp03b(*) from t group by f3;
   f3 | myaggp03b 
  ----+-----------
   b  | {}
- a  | {}
   c  | {}
+ a  | {}
  (3 rows)
  
  select f3, myaggp05a(f1) from t group by f3;
   f3 | myaggp05a 
  ----+-----------
   b  | {1,2,3}
- a  | {1,2,3}
   c  | {1,2}
+ a  | {1,2,3}
  (3 rows)
  
  select f3, myaggp06a(f1) from t group by f3;
   f3 | myaggp06a 
  ----+-----------
   b  | {}
- a  | {}
   c  | {}
+ a  | {}
  (3 rows)
  
  select f3, myaggp08a(f1) from t group by f3;
   f3 | myaggp08a 
  ----+-----------
   b  | {}
- a  | {}
   c  | {}
+ a  | {}
  (3 rows)
  
  select f3, myaggp09a(f1) from t group by f3;
   f3 | myaggp09a 
  ----+-----------
   b  | {}
- a  | {}
   c  | {}
+ a  | {}
  (3 rows)
  
  select f3, myaggp09b(f1) from t group by f3;
   f3 | myaggp09b 
  ----+-----------
   b  | {}
- a  | {}
   c  | {}
+ a  | {}
  (3 rows)
  
  select f3, myaggp10a(f1) from t group by f3;
   f3 | myaggp10a 
  ----+-----------
   b  | {1,2,3}
- a  | {1,2,3}
   c  | {1,2}
+ a  | {1,2,3}
  (3 rows)
  
  select f3, myaggp10b(f1) from t group by f3;
   f3 | myaggp10b 
  ----+-----------
   b  | {1,2,3}
- a  | {1,2,3}
   c  | {1,2}
+ a  | {1,2,3}
  (3 rows)
  
  select f3, myaggp20a(f1) from t group by f3;
   f3 | myaggp20a 
  ----+-----------
   b  | {1,2,3}
- a  | {1,2,3}
   c  | {1,2}
+ a  | {1,2,3}
  (3 rows)
  
  select f3, myaggp20b(f1) from t group by f3;
   f3 | myaggp20b 
  ----+-----------
   b  | {1,2,3}
- a  | {1,2,3}
   c  | {1,2}
+ a  | {1,2,3}
  (3 rows)
  
  select f3, myaggn01a(*) from t group by f3;
   f3 | myaggn01a 
  ----+-----------
   b  | {}
- a  | {}
   c  | {}
+ a  | {}
  (3 rows)
  
  select f3, myaggn01b(*) from t group by f3;
   f3 | myaggn01b 
  ----+-----------
   b  | {}
- a  | {}
   c  | {}
+ a  | {}
  (3 rows)
  
  select f3, myaggn03a(*) from t group by f3;
   f3 | myaggn03a 
  ----+-----------
   b  | {}
- a  | {}
   c  | {}
+ a  | {}
  (3 rows)
  
  select f3, myaggn05a(f1) from t group by f3;
   f3 | myaggn05a 
  ----+-----------
   b  | {1,2,3}
- a  | {1,2,3}
   c  | {1,2}
+ a  | {1,2,3}
  (3 rows)
  
  select f3, myaggn05b(f1) from t group by f3;
   f3 | myaggn05b 
  ----+-----------
   b  | {1,2,3}
- a  | {1,2,3}
   c  | {1,2}
+ a  | {1,2,3}
  (3 rows)
  
  select f3, myaggn06a(f1) from t group by f3;
   f3 | myaggn06a 
  ----+-----------
   b  | {}
- a  | {}
   c  | {}
+ a  | {}
  (3 rows)
  
  select f3, myaggn06b(f1) from t group by f3;
   f3 | myaggn06b 
  ----+-----------
   b  | {}
- a  | {}
   c  | {}
+ a  | {}
  (3 rows)
  
  select f3, myaggn08a(f1) from t group by f3;
   f3 | myaggn08a 
  ----+-----------
   b  | {}
- a  | {}
   c  | {}
+ a  | {}
  (3 rows)
  
  select f3, myaggn08b(f1) from t group by f3;
   f3 | myaggn08b 
  ----+-----------
   b  | {}
- a  | {}
   c  | {}
+ a  | {}
  (3 rows)
  
  select f3, myaggn09a(f1) from t group by f3;
   f3 | myaggn09a 
  ----+-----------
   b  | {}
- a  | {}
   c  | {}
+ a  | {}
  (3 rows)
  
  select f3, myaggn10a(f1) from t group by f3;
   f3 | myaggn10a 
  ----+-----------
   b  | {1,2,3}
- a  | {1,2,3}
   c  | {1,2}
+ a  | {1,2,3}
  (3 rows)
author	Tom Lane <tgl@sss.pgh.pa.us>
	Tue, 19 Aug 2003 01:13:41 +0000 (01:13 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Tue, 19 Aug 2003 01:13:41 +0000 (01:13 +0000)
src/backend/executor/execGrouping.c		patch \| blob \| blame \| history
src/backend/executor/nodeAgg.c		patch \| blob \| blame \| history
src/backend/executor/nodeSubplan.c		patch \| blob \| blame \| history
src/backend/utils/hash/dynahash.c		patch \| blob \| blame \| history
src/backend/utils/hash/hashfn.c		patch \| blob \| blame \| history
src/include/executor/executor.h		patch \| blob \| blame \| history
src/include/nodes/execnodes.h		patch \| blob \| blame \| history
src/include/utils/hsearch.h		patch \| blob \| blame \| history
src/test/regress/expected/polymorphism.out		patch \| blob \| blame \| history