diff options
-rw-r--r-- | src/include/lib/simplehash.h | 55 |
1 files changed, 50 insertions, 5 deletions
diff --git a/src/include/lib/simplehash.h b/src/include/lib/simplehash.h index 74f768249a7..6c6c3ee0d09 100644 --- a/src/include/lib/simplehash.h +++ b/src/include/lib/simplehash.h @@ -157,13 +157,24 @@ SH_SCOPE void SH_STAT(SH_TYPE *tb); #include "utils/memutils.h" -/* conservative fillfactor for a robin hood table, might want to adjust */ -#define SH_FILLFACTOR (0.8) -/* increase fillfactor if we otherwise would error out */ -#define SH_MAX_FILLFACTOR (0.95) /* max data array size,we allow up to PG_UINT32_MAX buckets, including 0 */ #define SH_MAX_SIZE (((uint64) PG_UINT32_MAX) + 1) +/* normal fillfactor, unless already close to maximum */ +#ifndef SH_FILLFACTOR +#define SH_FILLFACTOR (0.9) +#endif +/* increase fillfactor if we otherwise would error out */ +#define SH_MAX_FILLFACTOR (0.98) +/* grow if actual and optimal location bigger than */ +#ifndef SH_GROW_MAX_DIB +#define SH_GROW_MAX_DIB 25 +#endif +/* grow if more than elements to move when inserting */ +#ifndef SH_GROW_MAX_MOVE +#define SH_GROW_MAX_MOVE 150 +#endif + #ifdef SH_STORE_HASH #define SH_COMPARE_KEYS(tb, ahash, akey, b) (ahash == SH_GET_HASH(tb, b) && SH_EQUAL(tb, b->SH_KEY, akey)) #else @@ -466,12 +477,18 @@ SH_INSERT(SH_TYPE *tb, SH_KEY_TYPE key, bool *found) uint32 startelem; uint32 curelem; SH_ELEMENT_TYPE *data; - uint32 insertdist = 0; + uint32 insertdist; + +restart: + insertdist = 0; /* * We do the grow check even if the key is actually present, to avoid * doing the check inside the loop. This also lets us avoid having to * re-find our position in the hashtable after resizing. + * + * Note that this also reached when resizing the table due to + * SH_GROW_MAX_DIB / SH_GROW_MAX_MOVE. */ if (unlikely(tb->members >= tb->grow_threshold)) { @@ -536,6 +553,7 @@ SH_INSERT(SH_TYPE *tb, SH_KEY_TYPE key, bool *found) SH_ELEMENT_TYPE *lastentry = entry; uint32 emptyelem = curelem; uint32 moveelem; + int32 emptydist = 0; /* find next empty bucket */ while (true) @@ -550,6 +568,19 @@ SH_INSERT(SH_TYPE *tb, SH_KEY_TYPE key, bool *found) lastentry = emptyentry; break; } + + /* + * To avoid negative consequences from overly imbalanced + * hashtables, grow the hashtable if collisions would require + * us to move a lot of entries. The most likely cause of such + * imbalance is filling a (currently) small table, from a + * currently big one, in hash-table order. + */ + if (++emptydist > SH_GROW_MAX_MOVE) + { + tb->grow_threshold = 0; + goto restart; + } } /* shift forward, starting at last occupied element */ @@ -585,6 +616,18 @@ SH_INSERT(SH_TYPE *tb, SH_KEY_TYPE key, bool *found) curelem = SH_NEXT(tb, curelem, startelem); insertdist++; + + /* + * To avoid negative consequences from overly imbalanced hashtables, + * grow the hashtable if collisions lead to large runs. The most + * likely cause of such imbalance is filling a (currently) small + * table, from a currently big one, in hash-table order. + */ + if (insertdist > SH_GROW_MAX_DIB) + { + tb->grow_threshold = 0; + goto restart; + } } } @@ -878,6 +921,8 @@ SH_STAT(SH_TYPE *tb) #undef SH_MAKE_NAME_ #undef SH_FILLFACTOR #undef SH_MAX_FILLFACTOR +#undef SH_GROW_MAX_DIB +#undef SH_GROW_MAX_MOVE #undef SH_MAX_SIZE /* types */ |