Use a hash table for catcache.c's CatCList objects.
authorTom Lane <tgl@sss.pgh.pa.us>
Fri, 22 Mar 2024 21:13:53 +0000 (17:13 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Fri, 22 Mar 2024 21:13:53 +0000 (17:13 -0400)
Up to now, all of the "catcache list" objects within a catalog cache
were just chained together on a single dlist, requiring O(N) time to
search.  Remarkably, we've not had serious performance problems with
that so far; but we got a complaint of a bad performance regression
from v15 in a case with a large number of roles in the system, which
traced down to O(N^2) total time when we probed N catcache lists.

Replace that data structure with a hashtable having an enlargeable
number of dlists, in an exactly parallel way to the data structure
we've used for years for the plain CatCTup cache members.  The extra
cost of maintaining a hash table seems negligible, since we were
already computing a hash value for list searches.

Normally this'd be HEAD-only material, but in view of the performance
regression it seems advisable to back-patch into v16.  In the v16
version of the patch, leave the dead cc_lists field where it is and
add the new fields at the end of struct catcache, to avoid possible
ABI breakage in case any external code is looking at these structs.
(We assume no external code is actually allocating new catcache
structs.)

Per report from alex work.

Discussion: https://postgr.es/m/CAGvXd3OSMbJQwOSc-Tq-Ro1CAz=vggErdSG7pv2s6vmmTOLJSg@mail.gmail.com

src/backend/utils/cache/catcache.c
src/include/utils/catcache.h

index d5a3c1b5914954e0141230a5fb56413e7c9220bd..569f51cb33439a252339d3b90546d2ab89a5c965 100644 (file)
@@ -88,6 +88,8 @@ static void CatCachePrintStats(int code, Datum arg);
 #endif
 static void CatCacheRemoveCTup(CatCache *cache, CatCTup *ct);
 static void CatCacheRemoveCList(CatCache *cache, CatCList *cl);
+static void RehashCatCache(CatCache *cp);
+static void RehashCatCacheLists(CatCache *cp);
 static void CatalogCacheInitializeCache(CatCache *cache);
 static CatCTup *CatalogCacheCreateEntry(CatCache *cache,
                                                                                HeapTuple ntp, SysScanDesc scandesc,
@@ -444,6 +446,7 @@ CatCachePrintStats(int code, Datum arg)
        long            cc_neg_hits = 0;
        long            cc_newloads = 0;
        long            cc_invals = 0;
+       long            cc_nlists = 0;
        long            cc_lsearches = 0;
        long            cc_lhits = 0;
 
@@ -453,7 +456,7 @@ CatCachePrintStats(int code, Datum arg)
 
                if (cache->cc_ntup == 0 && cache->cc_searches == 0)
                        continue;                       /* don't print unused caches */
-               elog(DEBUG2, "catcache %s/%u: %d tup, %ld srch, %ld+%ld=%ld hits, %ld+%ld=%ld loads, %ld invals, %ld lsrch, %ld lhits",
+               elog(DEBUG2, "catcache %s/%u: %d tup, %ld srch, %ld+%ld=%ld hits, %ld+%ld=%ld loads, %ld invals, %d lists, %ld lsrch, %ld lhits",
                         cache->cc_relname,
                         cache->cc_indexoid,
                         cache->cc_ntup,
@@ -465,6 +468,7 @@ CatCachePrintStats(int code, Datum arg)
                         cache->cc_searches - cache->cc_hits - cache->cc_neg_hits - cache->cc_newloads,
                         cache->cc_searches - cache->cc_hits - cache->cc_neg_hits,
                         cache->cc_invals,
+                        cache->cc_nlist,
                         cache->cc_lsearches,
                         cache->cc_lhits);
                cc_searches += cache->cc_searches;
@@ -472,10 +476,11 @@ CatCachePrintStats(int code, Datum arg)
                cc_neg_hits += cache->cc_neg_hits;
                cc_newloads += cache->cc_newloads;
                cc_invals += cache->cc_invals;
+               cc_nlists += cache->cc_nlist;
                cc_lsearches += cache->cc_lsearches;
                cc_lhits += cache->cc_lhits;
        }
-       elog(DEBUG2, "catcache totals: %d tup, %ld srch, %ld+%ld=%ld hits, %ld+%ld=%ld loads, %ld invals, %ld lsrch, %ld lhits",
+       elog(DEBUG2, "catcache totals: %d tup, %ld srch, %ld+%ld=%ld hits, %ld+%ld=%ld loads, %ld invals, %ld lists, %ld lsrch, %ld lhits",
                 CacheHdr->ch_ntup,
                 cc_searches,
                 cc_hits,
@@ -485,6 +490,7 @@ CatCachePrintStats(int code, Datum arg)
                 cc_searches - cc_hits - cc_neg_hits - cc_newloads,
                 cc_searches - cc_hits - cc_neg_hits,
                 cc_invals,
+                cc_nlists,
                 cc_lsearches,
                 cc_lhits);
 }
@@ -573,6 +579,8 @@ CatCacheRemoveCList(CatCache *cache, CatCList *cl)
                                         cache->cc_keyno, cl->keys);
 
        pfree(cl);
+
+       --cache->cc_nlist;
 }
 
 
@@ -611,14 +619,19 @@ CatCacheInvalidate(CatCache *cache, uint32 hashValue)
         * Invalidate *all* CatCLists in this cache; it's too hard to tell which
         * searches might still be correct, so just zap 'em all.
         */
-       dlist_foreach_modify(iter, &cache->cc_lists)
+       for (int i = 0; i < cache->cc_nlbuckets; i++)
        {
-               CatCList   *cl = dlist_container(CatCList, cache_elem, iter.cur);
+               dlist_head *bucket = &cache->cc_lbucket[i];
 
-               if (cl->refcount > 0)
-                       cl->dead = true;
-               else
-                       CatCacheRemoveCList(cache, cl);
+               dlist_foreach_modify(iter, bucket)
+               {
+                       CatCList   *cl = dlist_container(CatCList, cache_elem, iter.cur);
+
+                       if (cl->refcount > 0)
+                               cl->dead = true;
+                       else
+                               CatCacheRemoveCList(cache, cl);
+               }
        }
 
        /*
@@ -691,14 +704,19 @@ ResetCatalogCache(CatCache *cache)
        int                     i;
 
        /* Remove each list in this cache, or at least mark it dead */
-       dlist_foreach_modify(iter, &cache->cc_lists)
+       for (i = 0; i < cache->cc_nlbuckets; i++)
        {
-               CatCList   *cl = dlist_container(CatCList, cache_elem, iter.cur);
+               dlist_head *bucket = &cache->cc_lbucket[i];
 
-               if (cl->refcount > 0)
-                       cl->dead = true;
-               else
-                       CatCacheRemoveCList(cache, cl);
+               dlist_foreach_modify(iter, bucket)
+               {
+                       CatCList   *cl = dlist_container(CatCList, cache_elem, iter.cur);
+
+                       if (cl->refcount > 0)
+                               cl->dead = true;
+                       else
+                               CatCacheRemoveCList(cache, cl);
+               }
        }
 
        /* Remove each tuple in this cache, or at least mark it dead */
@@ -862,6 +880,12 @@ InitCatCache(int id,
                                                                         MCXT_ALLOC_ZERO);
        cp->cc_bucket = palloc0(nbuckets * sizeof(dlist_head));
 
+       /*
+        * Many catcaches never receive any list searches.  Therefore, we don't
+        * allocate the cc_lbuckets till we get a list search.
+        */
+       cp->cc_lbucket = NULL;
+
        /*
         * initialize the cache's relation information for the relation
         * corresponding to this cache, and initialize some of the new cache's
@@ -874,7 +898,9 @@ InitCatCache(int id,
        cp->cc_relisshared = false; /* temporary */
        cp->cc_tupdesc = (TupleDesc) NULL;
        cp->cc_ntup = 0;
+       cp->cc_nlist = 0;
        cp->cc_nbuckets = nbuckets;
+       cp->cc_nlbuckets = 0;
        cp->cc_nkeys = nkeys;
        for (i = 0; i < nkeys; ++i)
        {
@@ -939,6 +965,44 @@ RehashCatCache(CatCache *cp)
        cp->cc_bucket = newbucket;
 }
 
+/*
+ * Enlarge a catcache's list storage, doubling the number of buckets.
+ */
+static void
+RehashCatCacheLists(CatCache *cp)
+{
+       dlist_head *newbucket;
+       int                     newnbuckets;
+       int                     i;
+
+       elog(DEBUG1, "rehashing catalog cache id %d for %s; %d lists, %d buckets",
+                cp->id, cp->cc_relname, cp->cc_nlist, cp->cc_nlbuckets);
+
+       /* Allocate a new, larger, hash table. */
+       newnbuckets = cp->cc_nlbuckets * 2;
+       newbucket = (dlist_head *) MemoryContextAllocZero(CacheMemoryContext, newnbuckets * sizeof(dlist_head));
+
+       /* Move all entries from old hash table to new. */
+       for (i = 0; i < cp->cc_nlbuckets; i++)
+       {
+               dlist_mutable_iter iter;
+
+               dlist_foreach_modify(iter, &cp->cc_lbucket[i])
+               {
+                       CatCList   *cl = dlist_container(CatCList, cache_elem, iter.cur);
+                       int                     hashIndex = HASH_INDEX(cl->hash_value, newnbuckets);
+
+                       dlist_delete(iter.cur);
+                       dlist_push_head(&newbucket[hashIndex], &cl->cache_elem);
+               }
+       }
+
+       /* Switch to the new array. */
+       pfree(cp->cc_lbucket);
+       cp->cc_nlbuckets = newnbuckets;
+       cp->cc_lbucket = newbucket;
+}
+
 /*
  *             CatalogCacheInitializeCache
  *
@@ -1588,7 +1652,9 @@ SearchCatCacheList(CatCache *cache,
        Datum           v4 = 0;                 /* dummy last-column value */
        Datum           arguments[CATCACHE_MAXKEYS];
        uint32          lHashValue;
+       Index           lHashIndex;
        dlist_iter      iter;
+       dlist_head *lbucket;
        CatCList   *cl;
        CatCTup    *ct;
        List       *volatile ctlist;
@@ -1602,7 +1668,7 @@ SearchCatCacheList(CatCache *cache,
        /*
         * one-time startup overhead for each cache
         */
-       if (cache->cc_tupdesc == NULL)
+       if (unlikely(cache->cc_tupdesc == NULL))
                CatalogCacheInitializeCache(cache);
 
        Assert(nkeys > 0 && nkeys < cache->cc_nkeys);
@@ -1618,11 +1684,36 @@ SearchCatCacheList(CatCache *cache,
        arguments[3] = v4;
 
        /*
-        * compute a hash value of the given keys for faster search.  We don't
-        * presently divide the CatCList items into buckets, but this still lets
-        * us skip non-matching items quickly most of the time.
+        * If we haven't previously done a list search in this cache, create the
+        * bucket header array; otherwise, consider whether it's time to enlarge
+        * it.
+        */
+       if (cache->cc_lbucket == NULL)
+       {
+               /* Arbitrary initial size --- must be a power of 2 */
+               int                     nbuckets = 16;
+
+               cache->cc_lbucket = (dlist_head *)
+                       MemoryContextAllocZero(CacheMemoryContext,
+                                                                  nbuckets * sizeof(dlist_head));
+               /* Don't set cc_nlbuckets if we get OOM allocating cc_lbucket */
+               cache->cc_nlbuckets = nbuckets;
+       }
+       else
+       {
+               /*
+                * If the hash table has become too full, enlarge the buckets array.
+                * Quite arbitrarily, we enlarge when fill factor > 2.
+                */
+               if (cache->cc_nlist > cache->cc_nlbuckets * 2)
+                       RehashCatCacheLists(cache);
+       }
+
+       /*
+        * Find the hash bucket in which to look for the CatCList.
         */
        lHashValue = CatalogCacheComputeHashValue(cache, nkeys, v1, v2, v3, v4);
+       lHashIndex = HASH_INDEX(lHashValue, cache->cc_nlbuckets);
 
        /*
         * scan the items until we find a match or exhaust our list
@@ -1630,7 +1721,8 @@ SearchCatCacheList(CatCache *cache,
         * Note: it's okay to use dlist_foreach here, even though we modify the
         * dlist within the loop, because we don't continue the loop afterwards.
         */
-       dlist_foreach(iter, &cache->cc_lists)
+       lbucket = &cache->cc_lbucket[lHashIndex];
+       dlist_foreach(iter, lbucket)
        {
                cl = dlist_container(CatCList, cache_elem, iter.cur);
 
@@ -1650,13 +1742,13 @@ SearchCatCacheList(CatCache *cache,
                        continue;
 
                /*
-                * We found a matching list.  Move the list to the front of the
-                * cache's list-of-lists, to speed subsequent searches.  (We do not
+                * We found a matching list.  Move the list to the front of the list
+                * for its hashbucket, so as to speed subsequent searches.  (We do not
                 * move the members to the fronts of their hashbucket lists, however,
                 * since there's no point in that unless they are searched for
                 * individually.)
                 */
-               dlist_move_head(&cache->cc_lists, &cl->cache_elem);
+               dlist_move_head(lbucket, &cl->cache_elem);
 
                /* Bump the list's refcount and return it */
                ResourceOwnerEnlarge(CurrentResourceOwner);
@@ -1868,7 +1960,12 @@ SearchCatCacheList(CatCache *cache,
        }
        Assert(i == nmembers);
 
-       dlist_push_head(&cache->cc_lists, &cl->cache_elem);
+       /*
+        * Add the CatCList to the appropriate bucket, and count it.
+        */
+       dlist_push_head(lbucket, &cl->cache_elem);
+
+       cache->cc_nlist++;
 
        /* Finally, bump the list's refcount and return it */
        cl->refcount++;
index a75a528de88ba3e59b2a2e6104feedf67e6777b1..3fb9647b87c871740ee8cb737a78d1d327f45009 100644 (file)
@@ -51,9 +51,11 @@ typedef struct catcache
        CCFastEqualFN cc_fastequal[CATCACHE_MAXKEYS];   /* fast equal function for
                                                                                                         * each key */
        int                     cc_keyno[CATCACHE_MAXKEYS]; /* AttrNumber of each key */
-       dlist_head      cc_lists;               /* list of CatCList structs */
-       int                     cc_ntup;                /* # of tuples currently in this cache */
        int                     cc_nkeys;               /* # of keys (1..CATCACHE_MAXKEYS) */
+       int                     cc_ntup;                /* # of tuples currently in this cache */
+       int                     cc_nlist;               /* # of CatCLists currently in this cache */
+       int                     cc_nlbuckets;   /* # of CatCList hash buckets in this cache */
+       dlist_head *cc_lbucket;         /* hash buckets for CatCLists */
        const char *cc_relname;         /* name of relation the tuples come from */
        Oid                     cc_reloid;              /* OID of relation the tuples come from */
        Oid                     cc_indexoid;    /* OID of index matching cache keys */