Get rid of chash_bucket hack in favor of using formal hazard pointers.
authorRobert Haas <rhaas@postgresql.org>
Wed, 1 Aug 2012 16:13:36 +0000 (16:13 +0000)
committerRobert Haas <rhaas@postgresql.org>
Wed, 1 Aug 2012 16:13:36 +0000 (16:13 +0000)
contrib/hashtest/hashtest.c
src/backend/utils/hash/chash.c
src/include/storage/proc.h
src/include/utils/chash.h

index bd716c7f2ca9c45e928af739dcf396bdb2315c19..25c6c3517c7aebbefc508d15d9a17a393e5b6f04 100644 (file)
@@ -42,7 +42,6 @@ typedef struct
 
 static CHashDescriptor cdesc = {
        "hashtest-chash",       /* name */
-       0xd3c8eae3,                     /* id */
        1048576,                        /* capacity */
        sizeof(hentry),         /* element size */
        sizeof(uint32)          /* key size */
index 2aa47a3720b007f25769b1292afc07dc72850ce0..83e514689ed2865eef02cccfdfdc3c74c0f063df 100644 (file)
@@ -183,25 +183,6 @@ typedef struct
        bool            found;
 } CHashScanResult;
 
-/*
- * We need a memory barrier to make sure that our bucket advertisement is
- * committed to memory before we begin scanning the bucket, and another one
- * to make sure the scan is done before we cease advertising it.
- */
-#define CHashTableSuppressGC(table, bno) \
-       do { \
-               Assert(MyProc->chash_bucket == 0); \
-               MyProc->chash_bucket = \
-                       ((uint64) table->desc.id)<<32 | (bucket>>table->garbage_shift); \
-               pg_memory_barrier(); \
-       } while (0)
-#define CHashTableUnsuppressGC() \
-       do { \
-               Assert(MyProc->chash_bucket != 0); \
-               pg_memory_barrier(); \
-               MyProc->chash_bucket = 0; \
-       } while (0)
-
 /* Function prototypes. */
 static CHashPtr CHashAllocate(CHashTable table);
 static void CHashAddToGarbage(CHashTable table, uint32 bucket, CHashPtr c);
@@ -230,8 +211,6 @@ CHashBootstrap(CHashDescriptor *desc)
        memcpy(&table->desc, desc, sizeof(CHashDescriptor)); 
 
        /* Sanity checks. */
-       if (desc->id == 0)
-               elog(ERROR, "concurrent hash table id must not be zero");
        if (desc->capacity < 1 || desc->capacity > CHashMaxCapacity)
                elog(ERROR, "invalid capacity for concurrent hash");
        if (desc->key_size < 1 || desc->key_size > desc->element_size)
@@ -390,20 +369,25 @@ CHashSearch(CHashTable table, void *entry)
 {
        uint32  hashcode = hash_any(entry, table->desc.key_size);
        uint32  bucket = hashcode & table->bucket_mask;
+       CHashPtr           *b = &table->bucket[bucket];
        CHashScanResult scan;
 
        /* Prevent garbage collection for this bucket. */
-       CHashTableSuppressGC(table, bucket);
+       Assert(MyProc->hazard[0] == NULL);
+       MyProc->hazard[0] = b;
+       pg_memory_barrier();
 
        /* Scan bucket and return data from any matching entry. */
-       CHashBucketScan(table, &table->bucket[bucket], hashcode, entry, &scan);
+       CHashBucketScan(table, b, hashcode, entry, &scan);
        if (scan.found)
                memcpy(((char *) entry) + table->desc.key_size,
                           CHashNodeGetItem(scan.target_node) + table->desc.key_size,
                           table->desc.element_size - table->desc.key_size);
 
        /* Allow garbage collection for this bucket. */
-       CHashTableUnsuppressGC();
+       Assert(MyProc->hazard[0] != NULL);
+       pg_memory_barrier();
+       MyProc->hazard[0] = NULL;
 
        return scan.found;
 }
@@ -426,6 +410,7 @@ CHashInsert(CHashTable table, void *entry)
 {
        uint32  hashcode = hash_any(entry, table->desc.key_size);
        uint32  bucket = hashcode & table->bucket_mask;
+       CHashPtr           *b = &table->bucket[bucket];
        CHashPtr        new;
        CHashNode  *nnew;
        CHashScanResult scan;
@@ -441,7 +426,9 @@ CHashInsert(CHashTable table, void *entry)
        memcpy(CHashNodeGetItem(nnew), entry, table->desc.element_size);
 
        /* Prevent garbage collection for this bucket. */
-       CHashTableSuppressGC(table, bucket);
+       Assert(MyProc->hazard[0] == NULL);
+       MyProc->hazard[0] = b;
+       pg_memory_barrier();
 
        /*
         * Scan the bucket.  If we don't find a match, use compare-and-swap to
@@ -449,7 +436,7 @@ CHashInsert(CHashTable table, void *entry)
         * return the data to the caller.
         */
 retry:
-       CHashBucketScan(table, &table->bucket[bucket], hashcode, entry, &scan);
+       CHashBucketScan(table, b, hashcode, entry, &scan);
        if (scan.found)
                memcpy(((char *) entry) + table->desc.key_size,
                           CHashNodeGetItem(scan.target_node) + table->desc.key_size,
@@ -482,7 +469,9 @@ retry:
        }
 
        /* Allow garbage collection for this bucket. */
-       CHashTableUnsuppressGC();
+       Assert(MyProc->hazard[0] != NULL);
+       pg_memory_barrier();
+       MyProc->hazard[0] = NULL;
 
        /* If the insert failed, free the entry we allocated. */
        if (scan.found)
@@ -501,14 +490,17 @@ CHashDelete(CHashTable table, void *entry)
 {
        uint32  hashcode = hash_any(entry, table->desc.key_size);
        uint32  bucket = hashcode & table->bucket_mask;
+       CHashPtr           *b = &table->bucket[bucket];
        bool    cleanup_scan = false;
        CHashScanResult scan;
 
        /* Prevent garbage collection for this bucket. */
-       CHashTableSuppressGC(table, bucket);
+       Assert(MyProc->hazard[0] == NULL);
+       MyProc->hazard[0] = b;
+       pg_memory_barrier();
 
        /* Scan bucket. */
-       CHashBucketScan(table, &table->bucket[bucket], hashcode, entry, &scan);
+       CHashBucketScan(table, b, hashcode, entry, &scan);
 
        /*
         * If we found a match, then loop until we succesfully delete-mark the
@@ -550,11 +542,13 @@ CHashDelete(CHashTable table, void *entry)
        if (cleanup_scan)
        {
                table->stats.s_cleanup_scan++;
-               CHashBucketCleanup(table, &table->bucket[bucket], hashcode);
+               CHashBucketCleanup(table, b, hashcode);
        }
 
        /* Allow garbage collection for this bucket. */
-       CHashTableUnsuppressGC();
+       Assert(MyProc->hazard[0] != NULL);
+       pg_memory_barrier();
+       MyProc->hazard[0] = NULL;
 
        /* We're done. */
        return scan.found;
@@ -879,7 +873,6 @@ CHashAllocate(CHashTable table)
                        table->stats.s_gc_pop_fail++;
                else
                {
-                       uint64          chash_bucket;
                        uint32          i;
                        CHashPtr        fhead;
                        CHashNode *n;
@@ -896,18 +889,18 @@ CHashAllocate(CHashTable table)
                         * might want to eventually enter a longer sleep here, or PANIC,
                         * but it's not clear exactly how to calibrate that.
                         */
-                       chash_bucket = ((uint64) table->desc.id)<<32 | table->gc_next;
                        for (i = 0; i < ProcGlobal->allProcCount; i++)
                        {
                                PGPROC     *proc = &ProcGlobal->allProcs[i];
 
-                               while (proc->chash_bucket == chash_bucket)
+                               while (proc->hazard[0] == b)
                                        ;
                        }
 
                        /* Remove one item from list to satisfy current allocation. */
                        new = garbage;
                        n = CHashTableGetNode(table, new);
+                       pg_read_barrier_depends();
                        fhead = n->un.gcnext;
 
                        /* Put any remaining elements back on the free list. */
index 1e91eedc2b88c26548cc76d724e6d9b8e8f88601..cc401bab531bf936e241381cc24bd28bbeba175a 100644 (file)
@@ -54,6 +54,14 @@ struct XidCache
  */
 #define                FP_LOCK_SLOTS_PER_BACKEND 16
 
+/*
+ * Some lock-free algorithms require each backend process to be able to
+ * advertise a certain number of "hazard pointers" in shared memory.
+ * Right now one per backend is enough for our purpose, but some
+ * algorithms require more.
+ */
+#define                NUM_HAZARD_POINTERS                     1
+
 /*
  * Each backend has a PGPROC struct in shared memory.  There is also a list of
  * currently-unused PGPROC structs that will be reallocated to new backends.
@@ -140,8 +148,11 @@ struct PGPROC
        LocalTransactionId fpLocalTransactionId;        /* lxid for fast-path VXID
                                                                                                 * lock */
 
-       /* chash bucket currently being scanned. */
-       uint64          chash_bucket;
+       /*
+        * Hazard pointers. Currently one is enough, though some algorithms
+        * require a few more.
+        */
+       void       *hazard[NUM_HAZARD_POINTERS];
 };
 
 /* NOTE: "typedef struct PGPROC PGPROC" appears in storage/lock.h. */
index b356915115f6946e47e0f888a2683a58ef8c9689..703aa3d7de5572a713f8600dc817d994566ab22d 100644 (file)
@@ -17,7 +17,6 @@
 typedef struct
 {
        const char *shmem_name;         /* shared memory name for this hash table */
-       uint32          id;                             /* unique identifier for this hash table */
        uint32          capacity;               /* maximum size of hash table */
        uint16          element_size;   /* size of each element */
        uint16          key_size;               /* size of each key */