From 4a05e7de309bc6fb2fbd42deba02e97feb1c4306 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Wed, 1 Aug 2012 16:13:36 +0000 Subject: [PATCH] Get rid of chash_bucket hack in favor of using formal hazard pointers. --- contrib/hashtest/hashtest.c | 1 - src/backend/utils/hash/chash.c | 61 +++++++++++++++------------------- src/include/storage/proc.h | 15 +++++++-- src/include/utils/chash.h | 1 - 4 files changed, 40 insertions(+), 38 deletions(-) diff --git a/contrib/hashtest/hashtest.c b/contrib/hashtest/hashtest.c index bd716c7f2c..25c6c3517c 100644 --- a/contrib/hashtest/hashtest.c +++ b/contrib/hashtest/hashtest.c @@ -42,7 +42,6 @@ typedef struct static CHashDescriptor cdesc = { "hashtest-chash", /* name */ - 0xd3c8eae3, /* id */ 1048576, /* capacity */ sizeof(hentry), /* element size */ sizeof(uint32) /* key size */ diff --git a/src/backend/utils/hash/chash.c b/src/backend/utils/hash/chash.c index 2aa47a3720..83e514689e 100644 --- a/src/backend/utils/hash/chash.c +++ b/src/backend/utils/hash/chash.c @@ -183,25 +183,6 @@ typedef struct bool found; } CHashScanResult; -/* - * We need a memory barrier to make sure that our bucket advertisement is - * committed to memory before we begin scanning the bucket, and another one - * to make sure the scan is done before we cease advertising it. - */ -#define CHashTableSuppressGC(table, bno) \ - do { \ - Assert(MyProc->chash_bucket == 0); \ - MyProc->chash_bucket = \ - ((uint64) table->desc.id)<<32 | (bucket>>table->garbage_shift); \ - pg_memory_barrier(); \ - } while (0) -#define CHashTableUnsuppressGC() \ - do { \ - Assert(MyProc->chash_bucket != 0); \ - pg_memory_barrier(); \ - MyProc->chash_bucket = 0; \ - } while (0) - /* Function prototypes. */ static CHashPtr CHashAllocate(CHashTable table); static void CHashAddToGarbage(CHashTable table, uint32 bucket, CHashPtr c); @@ -230,8 +211,6 @@ CHashBootstrap(CHashDescriptor *desc) memcpy(&table->desc, desc, sizeof(CHashDescriptor)); /* Sanity checks. */ - if (desc->id == 0) - elog(ERROR, "concurrent hash table id must not be zero"); if (desc->capacity < 1 || desc->capacity > CHashMaxCapacity) elog(ERROR, "invalid capacity for concurrent hash"); if (desc->key_size < 1 || desc->key_size > desc->element_size) @@ -390,20 +369,25 @@ CHashSearch(CHashTable table, void *entry) { uint32 hashcode = hash_any(entry, table->desc.key_size); uint32 bucket = hashcode & table->bucket_mask; + CHashPtr *b = &table->bucket[bucket]; CHashScanResult scan; /* Prevent garbage collection for this bucket. */ - CHashTableSuppressGC(table, bucket); + Assert(MyProc->hazard[0] == NULL); + MyProc->hazard[0] = b; + pg_memory_barrier(); /* Scan bucket and return data from any matching entry. */ - CHashBucketScan(table, &table->bucket[bucket], hashcode, entry, &scan); + CHashBucketScan(table, b, hashcode, entry, &scan); if (scan.found) memcpy(((char *) entry) + table->desc.key_size, CHashNodeGetItem(scan.target_node) + table->desc.key_size, table->desc.element_size - table->desc.key_size); /* Allow garbage collection for this bucket. */ - CHashTableUnsuppressGC(); + Assert(MyProc->hazard[0] != NULL); + pg_memory_barrier(); + MyProc->hazard[0] = NULL; return scan.found; } @@ -426,6 +410,7 @@ CHashInsert(CHashTable table, void *entry) { uint32 hashcode = hash_any(entry, table->desc.key_size); uint32 bucket = hashcode & table->bucket_mask; + CHashPtr *b = &table->bucket[bucket]; CHashPtr new; CHashNode *nnew; CHashScanResult scan; @@ -441,7 +426,9 @@ CHashInsert(CHashTable table, void *entry) memcpy(CHashNodeGetItem(nnew), entry, table->desc.element_size); /* Prevent garbage collection for this bucket. */ - CHashTableSuppressGC(table, bucket); + Assert(MyProc->hazard[0] == NULL); + MyProc->hazard[0] = b; + pg_memory_barrier(); /* * Scan the bucket. If we don't find a match, use compare-and-swap to @@ -449,7 +436,7 @@ CHashInsert(CHashTable table, void *entry) * return the data to the caller. */ retry: - CHashBucketScan(table, &table->bucket[bucket], hashcode, entry, &scan); + CHashBucketScan(table, b, hashcode, entry, &scan); if (scan.found) memcpy(((char *) entry) + table->desc.key_size, CHashNodeGetItem(scan.target_node) + table->desc.key_size, @@ -482,7 +469,9 @@ retry: } /* Allow garbage collection for this bucket. */ - CHashTableUnsuppressGC(); + Assert(MyProc->hazard[0] != NULL); + pg_memory_barrier(); + MyProc->hazard[0] = NULL; /* If the insert failed, free the entry we allocated. */ if (scan.found) @@ -501,14 +490,17 @@ CHashDelete(CHashTable table, void *entry) { uint32 hashcode = hash_any(entry, table->desc.key_size); uint32 bucket = hashcode & table->bucket_mask; + CHashPtr *b = &table->bucket[bucket]; bool cleanup_scan = false; CHashScanResult scan; /* Prevent garbage collection for this bucket. */ - CHashTableSuppressGC(table, bucket); + Assert(MyProc->hazard[0] == NULL); + MyProc->hazard[0] = b; + pg_memory_barrier(); /* Scan bucket. */ - CHashBucketScan(table, &table->bucket[bucket], hashcode, entry, &scan); + CHashBucketScan(table, b, hashcode, entry, &scan); /* * If we found a match, then loop until we succesfully delete-mark the @@ -550,11 +542,13 @@ CHashDelete(CHashTable table, void *entry) if (cleanup_scan) { table->stats.s_cleanup_scan++; - CHashBucketCleanup(table, &table->bucket[bucket], hashcode); + CHashBucketCleanup(table, b, hashcode); } /* Allow garbage collection for this bucket. */ - CHashTableUnsuppressGC(); + Assert(MyProc->hazard[0] != NULL); + pg_memory_barrier(); + MyProc->hazard[0] = NULL; /* We're done. */ return scan.found; @@ -879,7 +873,6 @@ CHashAllocate(CHashTable table) table->stats.s_gc_pop_fail++; else { - uint64 chash_bucket; uint32 i; CHashPtr fhead; CHashNode *n; @@ -896,18 +889,18 @@ CHashAllocate(CHashTable table) * might want to eventually enter a longer sleep here, or PANIC, * but it's not clear exactly how to calibrate that. */ - chash_bucket = ((uint64) table->desc.id)<<32 | table->gc_next; for (i = 0; i < ProcGlobal->allProcCount; i++) { PGPROC *proc = &ProcGlobal->allProcs[i]; - while (proc->chash_bucket == chash_bucket) + while (proc->hazard[0] == b) ; } /* Remove one item from list to satisfy current allocation. */ new = garbage; n = CHashTableGetNode(table, new); + pg_read_barrier_depends(); fhead = n->un.gcnext; /* Put any remaining elements back on the free list. */ diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index 1e91eedc2b..cc401bab53 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -54,6 +54,14 @@ struct XidCache */ #define FP_LOCK_SLOTS_PER_BACKEND 16 +/* + * Some lock-free algorithms require each backend process to be able to + * advertise a certain number of "hazard pointers" in shared memory. + * Right now one per backend is enough for our purpose, but some + * algorithms require more. + */ +#define NUM_HAZARD_POINTERS 1 + /* * Each backend has a PGPROC struct in shared memory. There is also a list of * currently-unused PGPROC structs that will be reallocated to new backends. @@ -140,8 +148,11 @@ struct PGPROC LocalTransactionId fpLocalTransactionId; /* lxid for fast-path VXID * lock */ - /* chash bucket currently being scanned. */ - uint64 chash_bucket; + /* + * Hazard pointers. Currently one is enough, though some algorithms + * require a few more. + */ + void *hazard[NUM_HAZARD_POINTERS]; }; /* NOTE: "typedef struct PGPROC PGPROC" appears in storage/lock.h. */ diff --git a/src/include/utils/chash.h b/src/include/utils/chash.h index b356915115..703aa3d7de 100644 --- a/src/include/utils/chash.h +++ b/src/include/utils/chash.h @@ -17,7 +17,6 @@ typedef struct { const char *shmem_name; /* shared memory name for this hash table */ - uint32 id; /* unique identifier for this hash table */ uint32 capacity; /* maximum size of hash table */ uint16 element_size; /* size of each element */ uint16 key_size; /* size of each key */ -- 2.39.5