LCOV - code coverage report
Current view: top level - src/backend/utils/mmgr - dsa.c (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 514 664 77.4 %
Date: 2025-07-07 17:17:41 Functions: 34 38 89.5 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * dsa.c
       4             :  *    Dynamic shared memory areas.
       5             :  *
       6             :  * This module provides dynamic shared memory areas which are built on top of
       7             :  * DSM segments.  While dsm.c allows segments of memory of shared memory to be
       8             :  * created and shared between backends, it isn't designed to deal with small
       9             :  * objects.  A DSA area is a shared memory heap usually backed by one or more
      10             :  * DSM segments which can allocate memory using dsa_allocate() and dsa_free().
      11             :  * Alternatively, it can be created in pre-existing shared memory, including a
      12             :  * DSM segment, and then create extra DSM segments as required.  Unlike the
      13             :  * regular system heap, it deals in pseudo-pointers which must be converted to
      14             :  * backend-local pointers before they are dereferenced.  These pseudo-pointers
      15             :  * can however be shared with other backends, and can be used to construct
      16             :  * shared data structures.
      17             :  *
      18             :  * Each DSA area manages a set of DSM segments, adding new segments as
      19             :  * required and detaching them when they are no longer needed.  Each segment
      20             :  * contains a number of 4KB pages, a free page manager for tracking
      21             :  * consecutive runs of free pages, and a page map for tracking the source of
      22             :  * objects allocated on each page.  Allocation requests above 8KB are handled
      23             :  * by choosing a segment and finding consecutive free pages in its free page
      24             :  * manager.  Allocation requests for smaller sizes are handled using pools of
      25             :  * objects of a selection of sizes.  Each pool consists of a number of 16 page
      26             :  * (64KB) superblocks allocated in the same way as large objects.  Allocation
      27             :  * of large objects and new superblocks is serialized by a single LWLock, but
      28             :  * allocation of small objects from pre-existing superblocks uses one LWLock
      29             :  * per pool.  Currently there is one pool, and therefore one lock, per size
      30             :  * class.  Per-core pools to increase concurrency and strategies for reducing
      31             :  * the resulting fragmentation are areas for future research.  Each superblock
      32             :  * is managed with a 'span', which tracks the superblock's freelist.  Free
      33             :  * requests are handled by looking in the page map to find which span an
      34             :  * address was allocated from, so that small objects can be returned to the
      35             :  * appropriate free list, and large object pages can be returned directly to
      36             :  * the free page map.  When allocating, simple heuristics for selecting
      37             :  * segments and superblocks try to encourage occupied memory to be
      38             :  * concentrated, increasing the likelihood that whole superblocks can become
      39             :  * empty and be returned to the free page manager, and whole segments can
      40             :  * become empty and be returned to the operating system.
      41             :  *
      42             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
      43             :  * Portions Copyright (c) 1994, Regents of the University of California
      44             :  *
      45             :  * IDENTIFICATION
      46             :  *    src/backend/utils/mmgr/dsa.c
      47             :  *
      48             :  *-------------------------------------------------------------------------
      49             :  */
      50             : 
      51             : #include "postgres.h"
      52             : 
      53             : #include "port/atomics.h"
      54             : #include "port/pg_bitutils.h"
      55             : #include "storage/dsm.h"
      56             : #include "storage/lwlock.h"
      57             : #include "utils/dsa.h"
      58             : #include "utils/freepage.h"
      59             : #include "utils/memutils.h"
      60             : #include "utils/resowner.h"
      61             : 
      62             : /*
      63             :  * How many segments to create before we double the segment size.  If this is
      64             :  * low, then there is likely to be a lot of wasted space in the largest
      65             :  * segment.  If it is high, then we risk running out of segment slots (see
      66             :  * dsm.c's limits on total number of segments), or limiting the total size
      67             :  * an area can manage when using small pointers.
      68             :  */
      69             : #define DSA_NUM_SEGMENTS_AT_EACH_SIZE 2
      70             : 
      71             : /*
      72             :  * The maximum number of DSM segments that an area can own, determined by
      73             :  * the number of bits remaining (but capped at 1024).
      74             :  */
      75             : #define DSA_MAX_SEGMENTS \
      76             :     Min(1024, (1 << ((SIZEOF_DSA_POINTER * 8) - DSA_OFFSET_WIDTH)))
      77             : 
      78             : /* The bitmask for extracting the offset from a dsa_pointer. */
      79             : #define DSA_OFFSET_BITMASK (((dsa_pointer) 1 << DSA_OFFSET_WIDTH) - 1)
      80             : 
      81             : /* Number of pages (see FPM_PAGE_SIZE) per regular superblock. */
      82             : #define DSA_PAGES_PER_SUPERBLOCK        16
      83             : 
      84             : /*
      85             :  * A magic number used as a sanity check for following DSM segments belonging
      86             :  * to a DSA area (this number will be XORed with the area handle and
      87             :  * the segment index).
      88             :  */
      89             : #define DSA_SEGMENT_HEADER_MAGIC 0x0ce26608
      90             : 
      91             : /* Build a dsa_pointer given a segment number and offset. */
      92             : #define DSA_MAKE_POINTER(segment_number, offset) \
      93             :     (((dsa_pointer) (segment_number) << DSA_OFFSET_WIDTH) | (offset))
      94             : 
      95             : /* Extract the segment number from a dsa_pointer. */
      96             : #define DSA_EXTRACT_SEGMENT_NUMBER(dp) ((dp) >> DSA_OFFSET_WIDTH)
      97             : 
      98             : /* Extract the offset from a dsa_pointer. */
      99             : #define DSA_EXTRACT_OFFSET(dp) ((dp) & DSA_OFFSET_BITMASK)
     100             : 
     101             : /* The type used for index segment indexes (zero based). */
     102             : typedef size_t dsa_segment_index;
     103             : 
     104             : /* Sentinel value for dsa_segment_index indicating 'none' or 'end'. */
     105             : #define DSA_SEGMENT_INDEX_NONE (~(dsa_segment_index)0)
     106             : 
     107             : /*
     108             :  * How many bins of segments do we have?  The bins are used to categorize
     109             :  * segments by their largest contiguous run of free pages.
     110             :  */
     111             : #define DSA_NUM_SEGMENT_BINS 16
     112             : 
     113             : /*
     114             :  * What is the lowest bin that holds segments that *might* have n contiguous
     115             :  * free pages?  There is no point in looking in segments in lower bins; they
     116             :  * definitely can't service a request for n free pages.
     117             :  */
     118             : static inline size_t
     119       42680 : contiguous_pages_to_segment_bin(size_t n)
     120             : {
     121             :     size_t      bin;
     122             : 
     123       42680 :     if (n == 0)
     124        1436 :         bin = 0;
     125             :     else
     126       41244 :         bin = pg_leftmost_one_pos_size_t(n) + 1;
     127             : 
     128       42680 :     return Min(bin, DSA_NUM_SEGMENT_BINS - 1);
     129             : }
     130             : 
     131             : /* Macros for access to locks. */
     132             : #define DSA_AREA_LOCK(area) (&area->control->lock)
     133             : #define DSA_SCLASS_LOCK(area, sclass) (&area->control->pools[sclass].lock)
     134             : 
     135             : /*
     136             :  * The header for an individual segment.  This lives at the start of each DSM
     137             :  * segment owned by a DSA area including the first segment (where it appears
     138             :  * as part of the dsa_area_control struct).
     139             :  */
     140             : typedef struct
     141             : {
     142             :     /* Sanity check magic value. */
     143             :     uint32      magic;
     144             :     /* Total number of pages in this segment (excluding metadata area). */
     145             :     size_t      usable_pages;
     146             :     /* Total size of this segment in bytes. */
     147             :     size_t      size;
     148             : 
     149             :     /*
     150             :      * Index of the segment that precedes this one in the same segment bin, or
     151             :      * DSA_SEGMENT_INDEX_NONE if this is the first one.
     152             :      */
     153             :     dsa_segment_index prev;
     154             : 
     155             :     /*
     156             :      * Index of the segment that follows this one in the same segment bin, or
     157             :      * DSA_SEGMENT_INDEX_NONE if this is the last one.
     158             :      */
     159             :     dsa_segment_index next;
     160             :     /* The index of the bin that contains this segment. */
     161             :     size_t      bin;
     162             : 
     163             :     /*
     164             :      * A flag raised to indicate that this segment is being returned to the
     165             :      * operating system and has been unpinned.
     166             :      */
     167             :     bool        freed;
     168             : } dsa_segment_header;
     169             : 
     170             : /*
     171             :  * Metadata for one superblock.
     172             :  *
     173             :  * For most blocks, span objects are stored out-of-line; that is, the span
     174             :  * object is not stored within the block itself.  But, as an exception, for a
     175             :  * "span of spans", the span object is stored "inline".  The allocation is
     176             :  * always exactly one page, and the dsa_area_span object is located at
     177             :  * the beginning of that page.  The size class is DSA_SCLASS_BLOCK_OF_SPANS,
     178             :  * and the remaining fields are used just as they would be in an ordinary
     179             :  * block.  We can't allocate spans out of ordinary superblocks because
     180             :  * creating an ordinary superblock requires us to be able to allocate a span
     181             :  * *first*.  Doing it this way avoids that circularity.
     182             :  */
     183             : typedef struct
     184             : {
     185             :     dsa_pointer pool;           /* Containing pool. */
     186             :     dsa_pointer prevspan;       /* Previous span. */
     187             :     dsa_pointer nextspan;       /* Next span. */
     188             :     dsa_pointer start;          /* Starting address. */
     189             :     size_t      npages;         /* Length of span in pages. */
     190             :     uint16      size_class;     /* Size class. */
     191             :     uint16      ninitialized;   /* Maximum number of objects ever allocated. */
     192             :     uint16      nallocatable;   /* Number of objects currently allocatable. */
     193             :     uint16      firstfree;      /* First object on free list. */
     194             :     uint16      nmax;           /* Maximum number of objects ever possible. */
     195             :     uint16      fclass;         /* Current fullness class. */
     196             : } dsa_area_span;
     197             : 
     198             : /*
     199             :  * Given a pointer to an object in a span, access the index of the next free
     200             :  * object in the same span (ie in the span's freelist) as an L-value.
     201             :  */
     202             : #define NextFreeObjectIndex(object) (* (uint16 *) (object))
     203             : 
     204             : /*
     205             :  * Small allocations are handled by dividing a single block of memory into
     206             :  * many small objects of equal size.  The possible allocation sizes are
     207             :  * defined by the following array.  Larger size classes are spaced more widely
     208             :  * than smaller size classes.  We fudge the spacing for size classes >1kB to
     209             :  * avoid space wastage: based on the knowledge that we plan to allocate 64kB
     210             :  * blocks, we bump the maximum object size up to the largest multiple of
     211             :  * 8 bytes that still lets us fit the same number of objects into one block.
     212             :  *
     213             :  * NB: Because of this fudging, if we were ever to use differently-sized blocks
     214             :  * for small allocations, these size classes would need to be reworked to be
     215             :  * optimal for the new size.
     216             :  *
     217             :  * NB: The optimal spacing for size classes, as well as the size of the blocks
     218             :  * out of which small objects are allocated, is not a question that has one
     219             :  * right answer.  Some allocators (such as tcmalloc) use more closely-spaced
     220             :  * size classes than we do here, while others (like aset.c) use more
     221             :  * widely-spaced classes.  Spacing the classes more closely avoids wasting
     222             :  * memory within individual chunks, but also means a larger number of
     223             :  * potentially-unfilled blocks.
     224             :  */
     225             : static const uint16 dsa_size_classes[] = {
     226             :     sizeof(dsa_area_span), 0,   /* special size classes */
     227             :     8, 16, 24, 32, 40, 48, 56, 64,  /* 8 classes separated by 8 bytes */
     228             :     80, 96, 112, 128,           /* 4 classes separated by 16 bytes */
     229             :     160, 192, 224, 256,         /* 4 classes separated by 32 bytes */
     230             :     320, 384, 448, 512,         /* 4 classes separated by 64 bytes */
     231             :     640, 768, 896, 1024,        /* 4 classes separated by 128 bytes */
     232             :     1280, 1560, 1816, 2048,     /* 4 classes separated by ~256 bytes */
     233             :     2616, 3120, 3640, 4096,     /* 4 classes separated by ~512 bytes */
     234             :     5456, 6552, 7280, 8192      /* 4 classes separated by ~1024 bytes */
     235             : };
     236             : #define DSA_NUM_SIZE_CLASSES                lengthof(dsa_size_classes)
     237             : 
     238             : /* Special size classes. */
     239             : #define DSA_SCLASS_BLOCK_OF_SPANS       0
     240             : #define DSA_SCLASS_SPAN_LARGE           1
     241             : 
     242             : /*
     243             :  * The following lookup table is used to map the size of small objects
     244             :  * (less than 1kB) onto the corresponding size class.  To use this table,
     245             :  * round the size of the object up to the next multiple of 8 bytes, and then
     246             :  * index into this array.
     247             :  */
     248             : static const uint8 dsa_size_class_map[] = {
     249             :     2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 11, 11, 12, 12, 13, 13,
     250             :     14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 17, 17, 17, 17,
     251             :     18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19,
     252             :     20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21,
     253             :     22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
     254             :     23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
     255             :     24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
     256             :     25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25
     257             : };
     258             : #define DSA_SIZE_CLASS_MAP_QUANTUM  8
     259             : 
     260             : /*
     261             :  * Superblocks are binned by how full they are.  Generally, each fullness
     262             :  * class corresponds to one quartile, but the block being used for
     263             :  * allocations is always at the head of the list for fullness class 1,
     264             :  * regardless of how full it really is.
     265             :  */
     266             : #define DSA_FULLNESS_CLASSES        4
     267             : 
     268             : /*
     269             :  * A dsa_area_pool represents a set of objects of a given size class.
     270             :  *
     271             :  * Perhaps there should be multiple pools for the same size class for
     272             :  * contention avoidance, but for now there is just one!
     273             :  */
     274             : typedef struct
     275             : {
     276             :     /* A lock protecting access to this pool. */
     277             :     LWLock      lock;
     278             :     /* A set of linked lists of spans, arranged by fullness. */
     279             :     dsa_pointer spans[DSA_FULLNESS_CLASSES];
     280             :     /* Should we pad this out to a cacheline boundary? */
     281             : } dsa_area_pool;
     282             : 
     283             : /*
     284             :  * The control block for an area.  This lives in shared memory, at the start of
     285             :  * the first DSM segment controlled by this area.
     286             :  */
     287             : typedef struct
     288             : {
     289             :     /* The segment header for the first segment. */
     290             :     dsa_segment_header segment_header;
     291             :     /* The handle for this area. */
     292             :     dsa_handle  handle;
     293             :     /* The handles of the segments owned by this area. */
     294             :     dsm_handle  segment_handles[DSA_MAX_SEGMENTS];
     295             :     /* Lists of segments, binned by maximum contiguous run of free pages. */
     296             :     dsa_segment_index segment_bins[DSA_NUM_SEGMENT_BINS];
     297             :     /* The object pools for each size class. */
     298             :     dsa_area_pool pools[DSA_NUM_SIZE_CLASSES];
     299             :     /* initial allocation segment size */
     300             :     size_t      init_segment_size;
     301             :     /* maximum allocation segment size */
     302             :     size_t      max_segment_size;
     303             :     /* The total size of all active segments. */
     304             :     size_t      total_segment_size;
     305             :     /* The maximum total size of backing storage we are allowed. */
     306             :     size_t      max_total_segment_size;
     307             :     /* Highest used segment index in the history of this area. */
     308             :     dsa_segment_index high_segment_index;
     309             :     /* The reference count for this area. */
     310             :     int         refcnt;
     311             :     /* A flag indicating that this area has been pinned. */
     312             :     bool        pinned;
     313             :     /* The number of times that segments have been freed. */
     314             :     size_t      freed_segment_counter;
     315             :     /* The LWLock tranche ID. */
     316             :     int         lwlock_tranche_id;
     317             :     /* The general lock (protects everything except object pools). */
     318             :     LWLock      lock;
     319             : } dsa_area_control;
     320             : 
     321             : /* Given a pointer to a pool, find a dsa_pointer. */
     322             : #define DsaAreaPoolToDsaPointer(area, p)    \
     323             :     DSA_MAKE_POINTER(0, (char *) p - (char *) area->control)
     324             : 
     325             : /*
     326             :  * A dsa_segment_map is stored within the backend-private memory of each
     327             :  * individual backend.  It holds the base address of the segment within that
     328             :  * backend, plus the addresses of key objects within the segment.  Those
     329             :  * could instead be derived from the base address but it's handy to have them
     330             :  * around.
     331             :  */
     332             : typedef struct
     333             : {
     334             :     dsm_segment *segment;       /* DSM segment */
     335             :     char       *mapped_address; /* Address at which segment is mapped */
     336             :     dsa_segment_header *header; /* Header (same as mapped_address) */
     337             :     FreePageManager *fpm;       /* Free page manager within segment. */
     338             :     dsa_pointer *pagemap;       /* Page map within segment. */
     339             : } dsa_segment_map;
     340             : 
     341             : /*
     342             :  * Per-backend state for a storage area.  Backends obtain one of these by
     343             :  * creating an area or attaching to an existing one using a handle.  Each
     344             :  * process that needs to use an area uses its own object to track where the
     345             :  * segments are mapped.
     346             :  */
     347             : struct dsa_area
     348             : {
     349             :     /* Pointer to the control object in shared memory. */
     350             :     dsa_area_control *control;
     351             : 
     352             :     /*
     353             :      * All the mappings are owned by this.  The dsa_area itself is not
     354             :      * directly tracked by the ResourceOwner, but the effect is the same. NULL
     355             :      * if the attachment has session lifespan, i.e if dsa_pin_mapping() has
     356             :      * been called.
     357             :      */
     358             :     ResourceOwner resowner;
     359             : 
     360             :     /*
     361             :      * This backend's array of segment maps, ordered by segment index
     362             :      * corresponding to control->segment_handles.  Some of the area's segments
     363             :      * may not be mapped in this backend yet, and some slots may have been
     364             :      * freed and need to be detached; these operations happen on demand.
     365             :      */
     366             :     dsa_segment_map segment_maps[DSA_MAX_SEGMENTS];
     367             : 
     368             :     /* The highest segment index this backend has ever mapped. */
     369             :     dsa_segment_index high_segment_index;
     370             : 
     371             :     /* The last observed freed_segment_counter. */
     372             :     size_t      freed_segment_counter;
     373             : };
     374             : 
     375             : #define DSA_SPAN_NOTHING_FREE   ((uint16) -1)
     376             : #define DSA_SUPERBLOCK_SIZE (DSA_PAGES_PER_SUPERBLOCK * FPM_PAGE_SIZE)
     377             : 
     378             : /* Given a pointer to a segment_map, obtain a segment index number. */
     379             : #define get_segment_index(area, segment_map_ptr) \
     380             :     (segment_map_ptr - &area->segment_maps[0])
     381             : 
     382             : static void init_span(dsa_area *area, dsa_pointer span_pointer,
     383             :                       dsa_area_pool *pool, dsa_pointer start, size_t npages,
     384             :                       uint16 size_class);
     385             : static bool transfer_first_span(dsa_area *area, dsa_area_pool *pool,
     386             :                                 int fromclass, int toclass);
     387             : static inline dsa_pointer alloc_object(dsa_area *area, int size_class);
     388             : static bool ensure_active_superblock(dsa_area *area, dsa_area_pool *pool,
     389             :                                      int size_class);
     390             : static dsa_segment_map *get_segment_by_index(dsa_area *area,
     391             :                                              dsa_segment_index index);
     392             : static void destroy_superblock(dsa_area *area, dsa_pointer span_pointer);
     393             : static void unlink_span(dsa_area *area, dsa_area_span *span);
     394             : static void add_span_to_fullness_class(dsa_area *area, dsa_area_span *span,
     395             :                                        dsa_pointer span_pointer, int fclass);
     396             : static void unlink_segment(dsa_area *area, dsa_segment_map *segment_map);
     397             : static dsa_segment_map *get_best_segment(dsa_area *area, size_t npages);
     398             : static dsa_segment_map *make_new_segment(dsa_area *area, size_t requested_pages);
     399             : static dsa_area *create_internal(void *place, size_t size,
     400             :                                  int tranche_id,
     401             :                                  dsm_handle control_handle,
     402             :                                  dsm_segment *control_segment,
     403             :                                  size_t init_segment_size,
     404             :                                  size_t max_segment_size);
     405             : static dsa_area *attach_internal(void *place, dsm_segment *segment,
     406             :                                  dsa_handle handle);
     407             : static void check_for_freed_segments(dsa_area *area);
     408             : static void check_for_freed_segments_locked(dsa_area *area);
     409             : static void rebin_segment(dsa_area *area, dsa_segment_map *segment_map);
     410             : 
     411             : /*
     412             :  * Create a new shared area in a new DSM segment.  Further DSM segments will
     413             :  * be allocated as required to extend the available space.
     414             :  *
     415             :  * We can't allocate a LWLock tranche_id within this function, because tranche
     416             :  * IDs are a scarce resource; there are only 64k available, using low numbers
     417             :  * when possible matters, and we have no provision for recycling them.  So,
     418             :  * we require the caller to provide one.
     419             :  */
     420             : dsa_area *
     421         178 : dsa_create_ext(int tranche_id, size_t init_segment_size, size_t max_segment_size)
     422             : {
     423             :     dsm_segment *segment;
     424             :     dsa_area   *area;
     425             : 
     426             :     /*
     427             :      * Create the DSM segment that will hold the shared control object and the
     428             :      * first segment of usable space.
     429             :      */
     430         178 :     segment = dsm_create(init_segment_size, 0);
     431             : 
     432             :     /*
     433             :      * All segments backing this area are pinned, so that DSA can explicitly
     434             :      * control their lifetime (otherwise a newly created segment belonging to
     435             :      * this area might be freed when the only backend that happens to have it
     436             :      * mapped in ends, corrupting the area).
     437             :      */
     438         178 :     dsm_pin_segment(segment);
     439             : 
     440             :     /* Create a new DSA area with the control object in this segment. */
     441         178 :     area = create_internal(dsm_segment_address(segment),
     442             :                            init_segment_size,
     443             :                            tranche_id,
     444             :                            dsm_segment_handle(segment), segment,
     445             :                            init_segment_size, max_segment_size);
     446             : 
     447             :     /* Clean up when the control segment detaches. */
     448         178 :     on_dsm_detach(segment, &dsa_on_dsm_detach_release_in_place,
     449         178 :                   PointerGetDatum(dsm_segment_address(segment)));
     450             : 
     451         178 :     return area;
     452             : }
     453             : 
     454             : /*
     455             :  * Create a new shared area in an existing shared memory space, which may be
     456             :  * either DSM or Postmaster-initialized memory.  DSM segments will be
     457             :  * allocated as required to extend the available space, though that can be
     458             :  * prevented with dsa_set_size_limit(area, size) using the same size provided
     459             :  * to dsa_create_in_place.
     460             :  *
     461             :  * Areas created in-place must eventually be released by the backend that
     462             :  * created them and all backends that attach to them.  This can be done
     463             :  * explicitly with dsa_release_in_place, or, in the special case that 'place'
     464             :  * happens to be in a pre-existing DSM segment, by passing in a pointer to the
     465             :  * segment so that a detach hook can be registered with the containing DSM
     466             :  * segment.
     467             :  *
     468             :  * See dsa_create() for a note about the tranche arguments.
     469             :  */
     470             : dsa_area *
     471        2990 : dsa_create_in_place_ext(void *place, size_t size,
     472             :                         int tranche_id, dsm_segment *segment,
     473             :                         size_t init_segment_size, size_t max_segment_size)
     474             : {
     475             :     dsa_area   *area;
     476             : 
     477        2990 :     area = create_internal(place, size, tranche_id,
     478             :                            DSM_HANDLE_INVALID, NULL,
     479             :                            init_segment_size, max_segment_size);
     480             : 
     481             :     /*
     482             :      * Clean up when the control segment detaches, if a containing DSM segment
     483             :      * was provided.
     484             :      */
     485        2990 :     if (segment != NULL)
     486         856 :         on_dsm_detach(segment, &dsa_on_dsm_detach_release_in_place,
     487             :                       PointerGetDatum(place));
     488             : 
     489        2990 :     return area;
     490             : }
     491             : 
     492             : /*
     493             :  * Obtain a handle that can be passed to other processes so that they can
     494             :  * attach to the given area.  Cannot be called for areas created with
     495             :  * dsa_create_in_place.
     496             :  */
     497             : dsa_handle
     498         172 : dsa_get_handle(dsa_area *area)
     499             : {
     500             :     Assert(area->control->handle != DSA_HANDLE_INVALID);
     501         172 :     return area->control->handle;
     502             : }
     503             : 
     504             : /*
     505             :  * Attach to an area given a handle generated (possibly in another process) by
     506             :  * dsa_get_handle.  The area must have been created with dsa_create (not
     507             :  * dsa_create_in_place).
     508             :  */
     509             : dsa_area *
     510         332 : dsa_attach(dsa_handle handle)
     511             : {
     512             :     dsm_segment *segment;
     513             :     dsa_area   *area;
     514             : 
     515             :     /*
     516             :      * An area handle is really a DSM segment handle for the first segment, so
     517             :      * we go ahead and attach to that.
     518             :      */
     519         332 :     segment = dsm_attach(handle);
     520         332 :     if (segment == NULL)
     521           0 :         ereport(ERROR,
     522             :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
     523             :                  errmsg("could not attach to dynamic shared area")));
     524             : 
     525         332 :     area = attach_internal(dsm_segment_address(segment), segment, handle);
     526             : 
     527             :     /* Clean up when the control segment detaches. */
     528         332 :     on_dsm_detach(segment, &dsa_on_dsm_detach_release_in_place,
     529         332 :                   PointerGetDatum(dsm_segment_address(segment)));
     530             : 
     531         332 :     return area;
     532             : }
     533             : 
     534             : /*
     535             :  * Returns whether the area with the given handle was already attached by the
     536             :  * current process.  The area must have been created with dsa_create (not
     537             :  * dsa_create_in_place).
     538             :  */
     539             : bool
     540           4 : dsa_is_attached(dsa_handle handle)
     541             : {
     542             :     /*
     543             :      * An area handle is really a DSM segment handle for the first segment, so
     544             :      * we can just search for that.
     545             :      */
     546           4 :     return dsm_find_mapping(handle) != NULL;
     547             : }
     548             : 
     549             : /*
     550             :  * Attach to an area that was created with dsa_create_in_place.  The caller
     551             :  * must somehow know the location in memory that was used when the area was
     552             :  * created, though it may be mapped at a different virtual address in this
     553             :  * process.
     554             :  *
     555             :  * See dsa_create_in_place for note about releasing in-place areas, and the
     556             :  * optional 'segment' argument which can be provided to allow automatic
     557             :  * release if the containing memory happens to be a DSM segment.
     558             :  */
     559             : dsa_area *
     560       47698 : dsa_attach_in_place(void *place, dsm_segment *segment)
     561             : {
     562             :     dsa_area   *area;
     563             : 
     564       47698 :     area = attach_internal(place, NULL, DSA_HANDLE_INVALID);
     565             : 
     566             :     /*
     567             :      * Clean up when the control segment detaches, if a containing DSM segment
     568             :      * was provided.
     569             :      */
     570       47698 :     if (segment != NULL)
     571        5258 :         on_dsm_detach(segment, &dsa_on_dsm_detach_release_in_place,
     572             :                       PointerGetDatum(place));
     573             : 
     574       47698 :     return area;
     575             : }
     576             : 
     577             : /*
     578             :  * Release a DSA area that was produced by dsa_create_in_place or
     579             :  * dsa_attach_in_place.  The 'segment' argument is ignored but provides an
     580             :  * interface suitable for on_dsm_detach, for the convenience of users who want
     581             :  * to create a DSA segment inside an existing DSM segment and have it
     582             :  * automatically released when the containing DSM segment is detached.
     583             :  * 'place' should be the address of the place where the area was created.
     584             :  *
     585             :  * This callback is automatically registered for the DSM segment containing
     586             :  * the control object of in-place areas when a segment is provided to
     587             :  * dsa_create_in_place or dsa_attach_in_place, and also for all areas created
     588             :  * with dsa_create.
     589             :  */
     590             : void
     591        6624 : dsa_on_dsm_detach_release_in_place(dsm_segment *segment, Datum place)
     592             : {
     593        6624 :     dsa_release_in_place(DatumGetPointer(place));
     594        6624 : }
     595             : 
     596             : /*
     597             :  * Release a DSA area that was produced by dsa_create_in_place or
     598             :  * dsa_attach_in_place.  The 'code' argument is ignored but provides an
     599             :  * interface suitable for on_shmem_exit or before_shmem_exit, for the
     600             :  * convenience of users who want to create a DSA segment inside shared memory
     601             :  * other than a DSM segment and have it automatically release at backend exit.
     602             :  * 'place' should be the address of the place where the area was created.
     603             :  */
     604             : void
     605           0 : dsa_on_shmem_exit_release_in_place(int code, Datum place)
     606             : {
     607           0 :     dsa_release_in_place(DatumGetPointer(place));
     608           0 : }
     609             : 
     610             : /*
     611             :  * Release a DSA area that was produced by dsa_create_in_place or
     612             :  * dsa_attach_in_place.  It is preferable to use one of the 'dsa_on_XXX'
     613             :  * callbacks so that this is managed automatically, because failure to release
     614             :  * an area created in-place leaks its segments permanently.
     615             :  *
     616             :  * This is also called automatically for areas produced by dsa_create or
     617             :  * dsa_attach as an implementation detail.
     618             :  */
     619             : void
     620       49064 : dsa_release_in_place(void *place)
     621             : {
     622       49064 :     dsa_area_control *control = (dsa_area_control *) place;
     623             :     int         i;
     624             : 
     625       49064 :     LWLockAcquire(&control->lock, LW_EXCLUSIVE);
     626             :     Assert(control->segment_header.magic ==
     627             :            (DSA_SEGMENT_HEADER_MAGIC ^ control->handle ^ 0));
     628             :     Assert(control->refcnt > 0);
     629       49064 :     if (--control->refcnt == 0)
     630             :     {
     631        2070 :         for (i = 0; i <= control->high_segment_index; ++i)
     632             :         {
     633             :             dsm_handle  handle;
     634             : 
     635        1164 :             handle = control->segment_handles[i];
     636        1164 :             if (handle != DSM_HANDLE_INVALID)
     637         308 :                 dsm_unpin_segment(handle);
     638             :         }
     639             :     }
     640       49064 :     LWLockRelease(&control->lock);
     641       49064 : }
     642             : 
     643             : /*
     644             :  * Keep a DSA area attached until end of session or explicit detach.
     645             :  *
     646             :  * By default, areas are owned by the current resource owner, which means they
     647             :  * are detached automatically when that scope ends.
     648             :  */
     649             : void
     650       45726 : dsa_pin_mapping(dsa_area *area)
     651             : {
     652             :     int         i;
     653             : 
     654       45726 :     if (area->resowner != NULL)
     655             :     {
     656        3158 :         area->resowner = NULL;
     657             : 
     658        6342 :         for (i = 0; i <= area->high_segment_index; ++i)
     659        3184 :             if (area->segment_maps[i].segment != NULL)
     660         314 :                 dsm_pin_mapping(area->segment_maps[i].segment);
     661             :     }
     662       45726 : }
     663             : 
     664             : /*
     665             :  * Allocate memory in this storage area.  The return value is a dsa_pointer
     666             :  * that can be passed to other processes, and converted to a local pointer
     667             :  * with dsa_get_address.  'flags' is a bitmap which should be constructed
     668             :  * from the following values:
     669             :  *
     670             :  * DSA_ALLOC_HUGE allows allocations >= 1GB.  Otherwise, such allocations
     671             :  * will result in an ERROR.
     672             :  *
     673             :  * DSA_ALLOC_NO_OOM causes this function to return InvalidDsaPointer when
     674             :  * no memory is available or a size limit established by dsa_set_size_limit
     675             :  * would be exceeded.  Otherwise, such allocations will result in an ERROR.
     676             :  *
     677             :  * DSA_ALLOC_ZERO causes the allocated memory to be zeroed.  Otherwise, the
     678             :  * contents of newly-allocated memory are indeterminate.
     679             :  *
     680             :  * These flags correspond to similarly named flags used by
     681             :  * MemoryContextAllocExtended().  See also the macros dsa_allocate and
     682             :  * dsa_allocate0 which expand to a call to this function with commonly used
     683             :  * flags.
     684             :  */
     685             : dsa_pointer
     686     1285740 : dsa_allocate_extended(dsa_area *area, size_t size, int flags)
     687             : {
     688             :     uint16      size_class;
     689             :     dsa_pointer start_pointer;
     690             :     dsa_segment_map *segment_map;
     691             :     dsa_pointer result;
     692             : 
     693             :     Assert(size > 0);
     694             : 
     695             :     /* Sanity check on huge individual allocation size. */
     696     1285740 :     if (((flags & DSA_ALLOC_HUGE) != 0 && !AllocHugeSizeIsValid(size)) ||
     697     1285740 :         ((flags & DSA_ALLOC_HUGE) == 0 && !AllocSizeIsValid(size)))
     698           0 :         elog(ERROR, "invalid DSA memory alloc request size %zu", size);
     699             : 
     700             :     /*
     701             :      * If bigger than the largest size class, just grab a run of pages from
     702             :      * the free page manager, instead of allocating an object from a pool.
     703             :      * There will still be a span, but it's a special class of span that
     704             :      * manages this whole allocation and simply gives all pages back to the
     705             :      * free page manager when dsa_free is called.
     706             :      */
     707     1285740 :     if (size > dsa_size_classes[lengthof(dsa_size_classes) - 1])
     708             :     {
     709        5500 :         size_t      npages = fpm_size_to_pages(size);
     710             :         size_t      first_page;
     711             :         dsa_pointer span_pointer;
     712        5500 :         dsa_area_pool *pool = &area->control->pools[DSA_SCLASS_SPAN_LARGE];
     713             : 
     714             :         /* Obtain a span object. */
     715        5500 :         span_pointer = alloc_object(area, DSA_SCLASS_BLOCK_OF_SPANS);
     716        5500 :         if (!DsaPointerIsValid(span_pointer))
     717             :         {
     718             :             /* Raise error unless asked not to. */
     719           0 :             if ((flags & DSA_ALLOC_NO_OOM) == 0)
     720           0 :                 ereport(ERROR,
     721             :                         (errcode(ERRCODE_OUT_OF_MEMORY),
     722             :                          errmsg("out of memory"),
     723             :                          errdetail("Failed on DSA request of size %zu.",
     724             :                                    size)));
     725           0 :             return InvalidDsaPointer;
     726             :         }
     727             : 
     728        5500 :         LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
     729             : 
     730             :         /* Find a segment from which to allocate. */
     731        5500 :         segment_map = get_best_segment(area, npages);
     732        5500 :         if (segment_map == NULL)
     733          46 :             segment_map = make_new_segment(area, npages);
     734        5500 :         if (segment_map == NULL)
     735             :         {
     736             :             /* Can't make any more segments: game over. */
     737           0 :             LWLockRelease(DSA_AREA_LOCK(area));
     738           0 :             dsa_free(area, span_pointer);
     739             : 
     740             :             /* Raise error unless asked not to. */
     741           0 :             if ((flags & DSA_ALLOC_NO_OOM) == 0)
     742           0 :                 ereport(ERROR,
     743             :                         (errcode(ERRCODE_OUT_OF_MEMORY),
     744             :                          errmsg("out of memory"),
     745             :                          errdetail("Failed on DSA request of size %zu.",
     746             :                                    size)));
     747           0 :             return InvalidDsaPointer;
     748             :         }
     749             : 
     750             :         /*
     751             :          * Ask the free page manager for a run of pages.  This should always
     752             :          * succeed, since both get_best_segment and make_new_segment should
     753             :          * only return a non-NULL pointer if it actually contains enough
     754             :          * contiguous freespace.  If it does fail, something in our backend
     755             :          * private state is out of whack, so use FATAL to kill the process.
     756             :          */
     757        5500 :         if (!FreePageManagerGet(segment_map->fpm, npages, &first_page))
     758           0 :             elog(FATAL,
     759             :                  "dsa_allocate could not find %zu free pages", npages);
     760        5500 :         LWLockRelease(DSA_AREA_LOCK(area));
     761             : 
     762        5500 :         start_pointer = DSA_MAKE_POINTER(get_segment_index(area, segment_map),
     763             :                                          first_page * FPM_PAGE_SIZE);
     764             : 
     765             :         /* Initialize span and pagemap. */
     766        5500 :         LWLockAcquire(DSA_SCLASS_LOCK(area, DSA_SCLASS_SPAN_LARGE),
     767             :                       LW_EXCLUSIVE);
     768        5500 :         init_span(area, span_pointer, pool, start_pointer, npages,
     769             :                   DSA_SCLASS_SPAN_LARGE);
     770        5500 :         segment_map->pagemap[first_page] = span_pointer;
     771        5500 :         LWLockRelease(DSA_SCLASS_LOCK(area, DSA_SCLASS_SPAN_LARGE));
     772             : 
     773             :         /* Zero-initialize the memory if requested. */
     774        5500 :         if ((flags & DSA_ALLOC_ZERO) != 0)
     775        1480 :             memset(dsa_get_address(area, start_pointer), 0, size);
     776             : 
     777        5500 :         return start_pointer;
     778             :     }
     779             : 
     780             :     /* Map allocation to a size class. */
     781     1280240 :     if (size < lengthof(dsa_size_class_map) * DSA_SIZE_CLASS_MAP_QUANTUM)
     782             :     {
     783             :         int         mapidx;
     784             : 
     785             :         /* For smaller sizes we have a lookup table... */
     786     1235272 :         mapidx = ((size + DSA_SIZE_CLASS_MAP_QUANTUM - 1) /
     787     1235272 :                   DSA_SIZE_CLASS_MAP_QUANTUM) - 1;
     788     1235272 :         size_class = dsa_size_class_map[mapidx];
     789             :     }
     790             :     else
     791             :     {
     792             :         uint16      min;
     793             :         uint16      max;
     794             : 
     795             :         /* ... and for the rest we search by binary chop. */
     796       44968 :         min = dsa_size_class_map[lengthof(dsa_size_class_map) - 1];
     797       44968 :         max = lengthof(dsa_size_classes) - 1;
     798             : 
     799      186016 :         while (min < max)
     800             :         {
     801      141048 :             uint16      mid = (min + max) / 2;
     802      141048 :             uint16      class_size = dsa_size_classes[mid];
     803             : 
     804      141048 :             if (class_size < size)
     805       84342 :                 min = mid + 1;
     806             :             else
     807       56706 :                 max = mid;
     808             :         }
     809             : 
     810       44968 :         size_class = min;
     811             :     }
     812             :     Assert(size <= dsa_size_classes[size_class]);
     813             :     Assert(size_class == 0 || size > dsa_size_classes[size_class - 1]);
     814             : 
     815             :     /* Attempt to allocate an object from the appropriate pool. */
     816     1280240 :     result = alloc_object(area, size_class);
     817             : 
     818             :     /* Check for failure to allocate. */
     819     1280240 :     if (!DsaPointerIsValid(result))
     820             :     {
     821             :         /* Raise error unless asked not to. */
     822           0 :         if ((flags & DSA_ALLOC_NO_OOM) == 0)
     823           0 :             ereport(ERROR,
     824             :                     (errcode(ERRCODE_OUT_OF_MEMORY),
     825             :                      errmsg("out of memory"),
     826             :                      errdetail("Failed on DSA request of size %zu.", size)));
     827           0 :         return InvalidDsaPointer;
     828             :     }
     829             : 
     830             :     /* Zero-initialize the memory if requested. */
     831     1280240 :     if ((flags & DSA_ALLOC_ZERO) != 0)
     832      631376 :         memset(dsa_get_address(area, result), 0, size);
     833             : 
     834     1280240 :     return result;
     835             : }
     836             : 
     837             : /*
     838             :  * Free memory obtained with dsa_allocate.
     839             :  */
     840             : void
     841      235822 : dsa_free(dsa_area *area, dsa_pointer dp)
     842             : {
     843             :     dsa_segment_map *segment_map;
     844             :     int         pageno;
     845             :     dsa_pointer span_pointer;
     846             :     dsa_area_span *span;
     847             :     char       *superblock;
     848             :     char       *object;
     849             :     size_t      size;
     850             :     int         size_class;
     851             : 
     852             :     /* Make sure we don't have a stale segment in the slot 'dp' refers to. */
     853      235822 :     check_for_freed_segments(area);
     854             : 
     855             :     /* Locate the object, span and pool. */
     856      235822 :     segment_map = get_segment_by_index(area, DSA_EXTRACT_SEGMENT_NUMBER(dp));
     857      235822 :     pageno = DSA_EXTRACT_OFFSET(dp) / FPM_PAGE_SIZE;
     858      235822 :     span_pointer = segment_map->pagemap[pageno];
     859      235822 :     span = dsa_get_address(area, span_pointer);
     860      235822 :     superblock = dsa_get_address(area, span->start);
     861      235822 :     object = dsa_get_address(area, dp);
     862      235822 :     size_class = span->size_class;
     863      235822 :     size = dsa_size_classes[size_class];
     864             : 
     865             :     /*
     866             :      * Special case for large objects that live in a special span: we return
     867             :      * those pages directly to the free page manager and free the span.
     868             :      */
     869      235822 :     if (span->size_class == DSA_SCLASS_SPAN_LARGE)
     870             :     {
     871             : 
     872             : #ifdef CLOBBER_FREED_MEMORY
     873             :         memset(object, 0x7f, span->npages * FPM_PAGE_SIZE);
     874             : #endif
     875             : 
     876             :         /* Give pages back to free page manager. */
     877        4226 :         LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
     878        4226 :         FreePageManagerPut(segment_map->fpm,
     879        4226 :                            DSA_EXTRACT_OFFSET(span->start) / FPM_PAGE_SIZE,
     880             :                            span->npages);
     881             : 
     882             :         /* Move segment to appropriate bin if necessary. */
     883        4226 :         rebin_segment(area, segment_map);
     884        4226 :         LWLockRelease(DSA_AREA_LOCK(area));
     885             : 
     886             :         /* Unlink span. */
     887        4226 :         LWLockAcquire(DSA_SCLASS_LOCK(area, DSA_SCLASS_SPAN_LARGE),
     888             :                       LW_EXCLUSIVE);
     889        4226 :         unlink_span(area, span);
     890        4226 :         LWLockRelease(DSA_SCLASS_LOCK(area, DSA_SCLASS_SPAN_LARGE));
     891             :         /* Free the span object so it can be reused. */
     892        4226 :         dsa_free(area, span_pointer);
     893        4226 :         return;
     894             :     }
     895             : 
     896             : #ifdef CLOBBER_FREED_MEMORY
     897             :     memset(object, 0x7f, size);
     898             : #endif
     899             : 
     900      231596 :     LWLockAcquire(DSA_SCLASS_LOCK(area, size_class), LW_EXCLUSIVE);
     901             : 
     902             :     /* Put the object on the span's freelist. */
     903             :     Assert(object >= superblock);
     904             :     Assert(object < superblock + DSA_SUPERBLOCK_SIZE);
     905             :     Assert((object - superblock) % size == 0);
     906      231596 :     NextFreeObjectIndex(object) = span->firstfree;
     907      231596 :     span->firstfree = (object - superblock) / size;
     908      231596 :     ++span->nallocatable;
     909             : 
     910             :     /*
     911             :      * See if the span needs to moved to a different fullness class, or be
     912             :      * freed so its pages can be given back to the segment.
     913             :      */
     914      231596 :     if (span->nallocatable == 1 && span->fclass == DSA_FULLNESS_CLASSES - 1)
     915             :     {
     916             :         /*
     917             :          * The block was completely full and is located in the
     918             :          * highest-numbered fullness class, which is never scanned for free
     919             :          * chunks.  We must move it to the next-lower fullness class.
     920             :          */
     921         346 :         unlink_span(area, span);
     922         346 :         add_span_to_fullness_class(area, span, span_pointer,
     923             :                                    DSA_FULLNESS_CLASSES - 2);
     924             : 
     925             :         /*
     926             :          * If this is the only span, and there is no active span, then we
     927             :          * should probably move this span to fullness class 1.  (Otherwise if
     928             :          * you allocate exactly all the objects in the only span, it moves to
     929             :          * class 3, then you free them all, it moves to 2, and then is given
     930             :          * back, leaving no active span).
     931             :          */
     932             :     }
     933      231250 :     else if (span->nallocatable == span->nmax &&
     934        7664 :              (span->fclass != 1 || span->prevspan != InvalidDsaPointer))
     935             :     {
     936             :         /*
     937             :          * This entire block is free, and it's not the active block for this
     938             :          * size class.  Return the memory to the free page manager. We don't
     939             :          * do this for the active block to prevent hysteresis: if we
     940             :          * repeatedly allocate and free the only chunk in the active block, it
     941             :          * will be very inefficient if we deallocate and reallocate the block
     942             :          * every time.
     943             :          */
     944          18 :         destroy_superblock(area, span_pointer);
     945             :     }
     946             : 
     947      231596 :     LWLockRelease(DSA_SCLASS_LOCK(area, size_class));
     948             : }
     949             : 
     950             : /*
     951             :  * Obtain a backend-local address for a dsa_pointer.  'dp' must point to
     952             :  * memory allocated by the given area (possibly in another process) that
     953             :  * hasn't yet been freed.  This may cause a segment to be mapped into the
     954             :  * current process if required, and may cause freed segments to be unmapped.
     955             :  */
     956             : void *
     957    18121746 : dsa_get_address(dsa_area *area, dsa_pointer dp)
     958             : {
     959             :     dsa_segment_index index;
     960             :     size_t      offset;
     961             : 
     962             :     /* Convert InvalidDsaPointer to NULL. */
     963    18121746 :     if (!DsaPointerIsValid(dp))
     964     2774496 :         return NULL;
     965             : 
     966             :     /* Process any requests to detach from freed segments. */
     967    15347250 :     check_for_freed_segments(area);
     968             : 
     969             :     /* Break the dsa_pointer into its components. */
     970    15347250 :     index = DSA_EXTRACT_SEGMENT_NUMBER(dp);
     971    15347250 :     offset = DSA_EXTRACT_OFFSET(dp);
     972             :     Assert(index < DSA_MAX_SEGMENTS);
     973             : 
     974             :     /* Check if we need to cause this segment to be mapped in. */
     975    15347250 :     if (unlikely(area->segment_maps[index].mapped_address == NULL))
     976             :     {
     977             :         /* Call for effect (we don't need the result). */
     978       37250 :         get_segment_by_index(area, index);
     979             :     }
     980             : 
     981    15347250 :     return area->segment_maps[index].mapped_address + offset;
     982             : }
     983             : 
     984             : /*
     985             :  * Pin this area, so that it will continue to exist even if all backends
     986             :  * detach from it.  In that case, the area can still be reattached to if a
     987             :  * handle has been recorded somewhere.
     988             :  */
     989             : void
     990        2262 : dsa_pin(dsa_area *area)
     991             : {
     992        2262 :     LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
     993        2262 :     if (area->control->pinned)
     994             :     {
     995           0 :         LWLockRelease(DSA_AREA_LOCK(area));
     996           0 :         elog(ERROR, "dsa_area already pinned");
     997             :     }
     998        2262 :     area->control->pinned = true;
     999        2262 :     ++area->control->refcnt;
    1000        2262 :     LWLockRelease(DSA_AREA_LOCK(area));
    1001        2262 : }
    1002             : 
    1003             : /*
    1004             :  * Undo the effects of dsa_pin, so that the given area can be freed when no
    1005             :  * backends are attached to it.  May be called only if dsa_pin has been
    1006             :  * called.
    1007             :  */
    1008             : void
    1009           0 : dsa_unpin(dsa_area *area)
    1010             : {
    1011           0 :     LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
    1012             :     Assert(area->control->refcnt > 1);
    1013           0 :     if (!area->control->pinned)
    1014             :     {
    1015           0 :         LWLockRelease(DSA_AREA_LOCK(area));
    1016           0 :         elog(ERROR, "dsa_area not pinned");
    1017             :     }
    1018           0 :     area->control->pinned = false;
    1019           0 :     --area->control->refcnt;
    1020           0 :     LWLockRelease(DSA_AREA_LOCK(area));
    1021           0 : }
    1022             : 
    1023             : /*
    1024             :  * Set the total size limit for this area.  This limit is checked whenever new
    1025             :  * segments need to be allocated from the operating system.  If the new size
    1026             :  * limit is already exceeded, this has no immediate effect.
    1027             :  *
    1028             :  * Note that the total virtual memory usage may be temporarily larger than
    1029             :  * this limit when segments have been freed, but not yet detached by all
    1030             :  * backends that have attached to them.
    1031             :  */
    1032             : void
    1033        4268 : dsa_set_size_limit(dsa_area *area, size_t limit)
    1034             : {
    1035        4268 :     LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
    1036        4268 :     area->control->max_total_segment_size = limit;
    1037        4268 :     LWLockRelease(DSA_AREA_LOCK(area));
    1038        4268 : }
    1039             : 
    1040             : /* Return the total size of all active segments */
    1041             : size_t
    1042        2102 : dsa_get_total_size(dsa_area *area)
    1043             : {
    1044             :     size_t      size;
    1045             : 
    1046        2102 :     LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
    1047        2102 :     size = area->control->total_segment_size;
    1048        2102 :     LWLockRelease(DSA_AREA_LOCK(area));
    1049             : 
    1050        2102 :     return size;
    1051             : }
    1052             : 
    1053             : /*
    1054             :  * Aggressively free all spare memory in the hope of returning DSM segments to
    1055             :  * the operating system.
    1056             :  */
    1057             : void
    1058           0 : dsa_trim(dsa_area *area)
    1059             : {
    1060             :     int         size_class;
    1061             : 
    1062             :     /*
    1063             :      * Trim in reverse pool order so we get to the spans-of-spans last, just
    1064             :      * in case any become entirely free while processing all the other pools.
    1065             :      */
    1066           0 :     for (size_class = DSA_NUM_SIZE_CLASSES - 1; size_class >= 0; --size_class)
    1067             :     {
    1068           0 :         dsa_area_pool *pool = &area->control->pools[size_class];
    1069             :         dsa_pointer span_pointer;
    1070             : 
    1071           0 :         if (size_class == DSA_SCLASS_SPAN_LARGE)
    1072             :         {
    1073             :             /* Large object frees give back segments aggressively already. */
    1074           0 :             continue;
    1075             :         }
    1076             : 
    1077             :         /*
    1078             :          * Search fullness class 1 only.  That is where we expect to find an
    1079             :          * entirely empty superblock (entirely empty superblocks in other
    1080             :          * fullness classes are returned to the free page map by dsa_free).
    1081             :          */
    1082           0 :         LWLockAcquire(DSA_SCLASS_LOCK(area, size_class), LW_EXCLUSIVE);
    1083           0 :         span_pointer = pool->spans[1];
    1084           0 :         while (DsaPointerIsValid(span_pointer))
    1085             :         {
    1086           0 :             dsa_area_span *span = dsa_get_address(area, span_pointer);
    1087           0 :             dsa_pointer next = span->nextspan;
    1088             : 
    1089           0 :             if (span->nallocatable == span->nmax)
    1090           0 :                 destroy_superblock(area, span_pointer);
    1091             : 
    1092           0 :             span_pointer = next;
    1093             :         }
    1094           0 :         LWLockRelease(DSA_SCLASS_LOCK(area, size_class));
    1095             :     }
    1096           0 : }
    1097             : 
    1098             : /*
    1099             :  * Print out debugging information about the internal state of the shared
    1100             :  * memory area.
    1101             :  */
    1102             : void
    1103           0 : dsa_dump(dsa_area *area)
    1104             : {
    1105             :     size_t      i,
    1106             :                 j;
    1107             : 
    1108             :     /*
    1109             :      * Note: This gives an inconsistent snapshot as it acquires and releases
    1110             :      * individual locks as it goes...
    1111             :      */
    1112             : 
    1113           0 :     LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
    1114           0 :     check_for_freed_segments_locked(area);
    1115           0 :     fprintf(stderr, "dsa_area handle %x:\n", area->control->handle);
    1116           0 :     fprintf(stderr, "  max_total_segment_size: %zu\n",
    1117           0 :             area->control->max_total_segment_size);
    1118           0 :     fprintf(stderr, "  total_segment_size: %zu\n",
    1119           0 :             area->control->total_segment_size);
    1120           0 :     fprintf(stderr, "  refcnt: %d\n", area->control->refcnt);
    1121           0 :     fprintf(stderr, "  pinned: %c\n", area->control->pinned ? 't' : 'f');
    1122           0 :     fprintf(stderr, "  segment bins:\n");
    1123           0 :     for (i = 0; i < DSA_NUM_SEGMENT_BINS; ++i)
    1124             :     {
    1125           0 :         if (area->control->segment_bins[i] != DSA_SEGMENT_INDEX_NONE)
    1126             :         {
    1127             :             dsa_segment_index segment_index;
    1128             : 
    1129           0 :             if (i == 0)
    1130           0 :                 fprintf(stderr,
    1131             :                         "    segment bin %zu (no contiguous free pages):\n", i);
    1132             :             else
    1133           0 :                 fprintf(stderr,
    1134             :                         "    segment bin %zu (at least %d contiguous pages free):\n",
    1135           0 :                         i, 1 << (i - 1));
    1136           0 :             segment_index = area->control->segment_bins[i];
    1137           0 :             while (segment_index != DSA_SEGMENT_INDEX_NONE)
    1138             :             {
    1139             :                 dsa_segment_map *segment_map;
    1140             : 
    1141             :                 segment_map =
    1142           0 :                     get_segment_by_index(area, segment_index);
    1143             : 
    1144           0 :                 fprintf(stderr,
    1145             :                         "      segment index %zu, usable_pages = %zu, "
    1146             :                         "contiguous_pages = %zu, mapped at %p\n",
    1147             :                         segment_index,
    1148           0 :                         segment_map->header->usable_pages,
    1149           0 :                         fpm_largest(segment_map->fpm),
    1150             :                         segment_map->mapped_address);
    1151           0 :                 segment_index = segment_map->header->next;
    1152             :             }
    1153             :         }
    1154             :     }
    1155           0 :     LWLockRelease(DSA_AREA_LOCK(area));
    1156             : 
    1157           0 :     fprintf(stderr, "  pools:\n");
    1158           0 :     for (i = 0; i < DSA_NUM_SIZE_CLASSES; ++i)
    1159             :     {
    1160           0 :         bool        found = false;
    1161             : 
    1162           0 :         LWLockAcquire(DSA_SCLASS_LOCK(area, i), LW_EXCLUSIVE);
    1163           0 :         for (j = 0; j < DSA_FULLNESS_CLASSES; ++j)
    1164           0 :             if (DsaPointerIsValid(area->control->pools[i].spans[j]))
    1165           0 :                 found = true;
    1166           0 :         if (found)
    1167             :         {
    1168           0 :             if (i == DSA_SCLASS_BLOCK_OF_SPANS)
    1169           0 :                 fprintf(stderr, "    pool for blocks of span objects:\n");
    1170           0 :             else if (i == DSA_SCLASS_SPAN_LARGE)
    1171           0 :                 fprintf(stderr, "    pool for large object spans:\n");
    1172             :             else
    1173           0 :                 fprintf(stderr,
    1174             :                         "    pool for size class %zu (object size %hu bytes):\n",
    1175           0 :                         i, dsa_size_classes[i]);
    1176           0 :             for (j = 0; j < DSA_FULLNESS_CLASSES; ++j)
    1177             :             {
    1178           0 :                 if (!DsaPointerIsValid(area->control->pools[i].spans[j]))
    1179           0 :                     fprintf(stderr, "      fullness class %zu is empty\n", j);
    1180             :                 else
    1181             :                 {
    1182           0 :                     dsa_pointer span_pointer = area->control->pools[i].spans[j];
    1183             : 
    1184           0 :                     fprintf(stderr, "      fullness class %zu:\n", j);
    1185           0 :                     while (DsaPointerIsValid(span_pointer))
    1186             :                     {
    1187             :                         dsa_area_span *span;
    1188             : 
    1189           0 :                         span = dsa_get_address(area, span_pointer);
    1190           0 :                         fprintf(stderr,
    1191             :                                 "        span descriptor at "
    1192             :                                 DSA_POINTER_FORMAT ", superblock at "
    1193             :                                 DSA_POINTER_FORMAT
    1194             :                                 ", pages = %zu, objects free = %hu/%hu\n",
    1195             :                                 span_pointer, span->start, span->npages,
    1196           0 :                                 span->nallocatable, span->nmax);
    1197           0 :                         span_pointer = span->nextspan;
    1198             :                     }
    1199             :                 }
    1200             :             }
    1201             :         }
    1202           0 :         LWLockRelease(DSA_SCLASS_LOCK(area, i));
    1203             :     }
    1204           0 : }
    1205             : 
    1206             : /*
    1207             :  * Return the smallest size that you can successfully provide to
    1208             :  * dsa_create_in_place.
    1209             :  */
    1210             : size_t
    1211        3886 : dsa_minimum_size(void)
    1212             : {
    1213             :     size_t      size;
    1214        3886 :     int         pages = 0;
    1215             : 
    1216        3886 :     size = MAXALIGN(sizeof(dsa_area_control)) +
    1217             :         MAXALIGN(sizeof(FreePageManager));
    1218             : 
    1219             :     /* Figure out how many pages we need, including the page map... */
    1220       11658 :     while (((size + FPM_PAGE_SIZE - 1) / FPM_PAGE_SIZE) > pages)
    1221             :     {
    1222        7772 :         ++pages;
    1223        7772 :         size += sizeof(dsa_pointer);
    1224             :     }
    1225             : 
    1226        3886 :     return pages * FPM_PAGE_SIZE;
    1227             : }
    1228             : 
    1229             : /*
    1230             :  * Workhorse function for dsa_create and dsa_create_in_place.
    1231             :  */
    1232             : static dsa_area *
    1233        3168 : create_internal(void *place, size_t size,
    1234             :                 int tranche_id,
    1235             :                 dsm_handle control_handle,
    1236             :                 dsm_segment *control_segment,
    1237             :                 size_t init_segment_size, size_t max_segment_size)
    1238             : {
    1239             :     dsa_area_control *control;
    1240             :     dsa_area   *area;
    1241             :     dsa_segment_map *segment_map;
    1242             :     size_t      usable_pages;
    1243             :     size_t      total_pages;
    1244             :     size_t      metadata_bytes;
    1245             :     int         i;
    1246             : 
    1247             :     /* Check the initial and maximum block sizes */
    1248             :     Assert(init_segment_size >= DSA_MIN_SEGMENT_SIZE);
    1249             :     Assert(max_segment_size >= init_segment_size);
    1250             :     Assert(max_segment_size <= DSA_MAX_SEGMENT_SIZE);
    1251             : 
    1252             :     /* Sanity check on the space we have to work in. */
    1253        3168 :     if (size < dsa_minimum_size())
    1254           0 :         elog(ERROR, "dsa_area space must be at least %zu, but %zu provided",
    1255             :              dsa_minimum_size(), size);
    1256             : 
    1257             :     /* Now figure out how much space is usable */
    1258        3168 :     total_pages = size / FPM_PAGE_SIZE;
    1259        3168 :     metadata_bytes =
    1260             :         MAXALIGN(sizeof(dsa_area_control)) +
    1261        3168 :         MAXALIGN(sizeof(FreePageManager)) +
    1262             :         total_pages * sizeof(dsa_pointer);
    1263             :     /* Add padding up to next page boundary. */
    1264        3168 :     if (metadata_bytes % FPM_PAGE_SIZE != 0)
    1265        3168 :         metadata_bytes += FPM_PAGE_SIZE - (metadata_bytes % FPM_PAGE_SIZE);
    1266             :     Assert(metadata_bytes <= size);
    1267        3168 :     usable_pages = (size - metadata_bytes) / FPM_PAGE_SIZE;
    1268             : 
    1269             :     /*
    1270             :      * Initialize the dsa_area_control object located at the start of the
    1271             :      * space.
    1272             :      */
    1273        3168 :     control = (dsa_area_control *) place;
    1274        3168 :     memset(place, 0, sizeof(*control));
    1275        3168 :     control->segment_header.magic =
    1276        3168 :         DSA_SEGMENT_HEADER_MAGIC ^ control_handle ^ 0;
    1277        3168 :     control->segment_header.next = DSA_SEGMENT_INDEX_NONE;
    1278        3168 :     control->segment_header.prev = DSA_SEGMENT_INDEX_NONE;
    1279        3168 :     control->segment_header.usable_pages = usable_pages;
    1280        3168 :     control->segment_header.freed = false;
    1281        3168 :     control->segment_header.size = size;
    1282        3168 :     control->handle = control_handle;
    1283        3168 :     control->init_segment_size = init_segment_size;
    1284        3168 :     control->max_segment_size = max_segment_size;
    1285        3168 :     control->max_total_segment_size = (size_t) -1;
    1286        3168 :     control->total_segment_size = size;
    1287        3168 :     control->segment_handles[0] = control_handle;
    1288       53856 :     for (i = 0; i < DSA_NUM_SEGMENT_BINS; ++i)
    1289       50688 :         control->segment_bins[i] = DSA_SEGMENT_INDEX_NONE;
    1290        3168 :     control->refcnt = 1;
    1291        3168 :     control->lwlock_tranche_id = tranche_id;
    1292             : 
    1293             :     /*
    1294             :      * Create the dsa_area object that this backend will use to access the
    1295             :      * area.  Other backends will need to obtain their own dsa_area object by
    1296             :      * attaching.
    1297             :      */
    1298        3168 :     area = palloc(sizeof(dsa_area));
    1299        3168 :     area->control = control;
    1300        3168 :     area->resowner = CurrentResourceOwner;
    1301        3168 :     memset(area->segment_maps, 0, sizeof(dsa_segment_map) * DSA_MAX_SEGMENTS);
    1302        3168 :     area->high_segment_index = 0;
    1303        3168 :     area->freed_segment_counter = 0;
    1304        3168 :     LWLockInitialize(&control->lock, control->lwlock_tranche_id);
    1305      123552 :     for (i = 0; i < DSA_NUM_SIZE_CLASSES; ++i)
    1306      120384 :         LWLockInitialize(DSA_SCLASS_LOCK(area, i),
    1307             :                          control->lwlock_tranche_id);
    1308             : 
    1309             :     /* Set up the segment map for this process's mapping. */
    1310        3168 :     segment_map = &area->segment_maps[0];
    1311        3168 :     segment_map->segment = control_segment;
    1312        3168 :     segment_map->mapped_address = place;
    1313        3168 :     segment_map->header = (dsa_segment_header *) place;
    1314        3168 :     segment_map->fpm = (FreePageManager *)
    1315        3168 :         (segment_map->mapped_address +
    1316             :          MAXALIGN(sizeof(dsa_area_control)));
    1317        3168 :     segment_map->pagemap = (dsa_pointer *)
    1318        3168 :         (segment_map->mapped_address +
    1319        3168 :          MAXALIGN(sizeof(dsa_area_control)) +
    1320             :          MAXALIGN(sizeof(FreePageManager)));
    1321             : 
    1322             :     /* Set up the free page map. */
    1323        3168 :     FreePageManagerInitialize(segment_map->fpm, segment_map->mapped_address);
    1324             :     /* There can be 0 usable pages if size is dsa_minimum_size(). */
    1325             : 
    1326        3168 :     if (usable_pages > 0)
    1327        2450 :         FreePageManagerPut(segment_map->fpm, metadata_bytes / FPM_PAGE_SIZE,
    1328             :                            usable_pages);
    1329             : 
    1330             :     /* Put this segment into the appropriate bin. */
    1331        3168 :     control->segment_bins[contiguous_pages_to_segment_bin(usable_pages)] = 0;
    1332        3168 :     segment_map->header->bin = contiguous_pages_to_segment_bin(usable_pages);
    1333             : 
    1334        3168 :     return area;
    1335             : }
    1336             : 
    1337             : /*
    1338             :  * Workhorse function for dsa_attach and dsa_attach_in_place.
    1339             :  */
    1340             : static dsa_area *
    1341       48030 : attach_internal(void *place, dsm_segment *segment, dsa_handle handle)
    1342             : {
    1343             :     dsa_area_control *control;
    1344             :     dsa_area   *area;
    1345             :     dsa_segment_map *segment_map;
    1346             : 
    1347       48030 :     control = (dsa_area_control *) place;
    1348             :     Assert(control->handle == handle);
    1349             :     Assert(control->segment_handles[0] == handle);
    1350             :     Assert(control->segment_header.magic ==
    1351             :            (DSA_SEGMENT_HEADER_MAGIC ^ handle ^ 0));
    1352             : 
    1353             :     /* Build the backend-local area object. */
    1354       48030 :     area = palloc(sizeof(dsa_area));
    1355       48030 :     area->control = control;
    1356       48030 :     area->resowner = CurrentResourceOwner;
    1357       48030 :     memset(&area->segment_maps[0], 0,
    1358             :            sizeof(dsa_segment_map) * DSA_MAX_SEGMENTS);
    1359       48030 :     area->high_segment_index = 0;
    1360             : 
    1361             :     /* Set up the segment map for this process's mapping. */
    1362       48030 :     segment_map = &area->segment_maps[0];
    1363       48030 :     segment_map->segment = segment; /* NULL for in-place */
    1364       48030 :     segment_map->mapped_address = place;
    1365       48030 :     segment_map->header = (dsa_segment_header *) segment_map->mapped_address;
    1366       48030 :     segment_map->fpm = (FreePageManager *)
    1367       48030 :         (segment_map->mapped_address + MAXALIGN(sizeof(dsa_area_control)));
    1368       48030 :     segment_map->pagemap = (dsa_pointer *)
    1369       48030 :         (segment_map->mapped_address + MAXALIGN(sizeof(dsa_area_control)) +
    1370             :          MAXALIGN(sizeof(FreePageManager)));
    1371             : 
    1372             :     /* Bump the reference count. */
    1373       48030 :     LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
    1374       48030 :     if (control->refcnt == 0)
    1375             :     {
    1376             :         /* We can't attach to a DSA area that has already been destroyed. */
    1377           0 :         ereport(ERROR,
    1378             :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
    1379             :                  errmsg("could not attach to dynamic shared area")));
    1380             :     }
    1381       48030 :     ++control->refcnt;
    1382       48030 :     area->freed_segment_counter = area->control->freed_segment_counter;
    1383       48030 :     LWLockRelease(DSA_AREA_LOCK(area));
    1384             : 
    1385       48030 :     return area;
    1386             : }
    1387             : 
    1388             : /*
    1389             :  * Add a new span to fullness class 1 of the indicated pool.
    1390             :  */
    1391             : static void
    1392       25906 : init_span(dsa_area *area,
    1393             :           dsa_pointer span_pointer,
    1394             :           dsa_area_pool *pool, dsa_pointer start, size_t npages,
    1395             :           uint16 size_class)
    1396             : {
    1397       25906 :     dsa_area_span *span = dsa_get_address(area, span_pointer);
    1398       25906 :     size_t      obsize = dsa_size_classes[size_class];
    1399             : 
    1400             :     /*
    1401             :      * The per-pool lock must be held because we manipulate the span list for
    1402             :      * this pool.
    1403             :      */
    1404             :     Assert(LWLockHeldByMe(DSA_SCLASS_LOCK(area, size_class)));
    1405             : 
    1406             :     /* Push this span onto the front of the span list for fullness class 1. */
    1407       25906 :     if (DsaPointerIsValid(pool->spans[1]))
    1408             :     {
    1409             :         dsa_area_span *head = (dsa_area_span *)
    1410        3950 :             dsa_get_address(area, pool->spans[1]);
    1411             : 
    1412        3950 :         head->prevspan = span_pointer;
    1413             :     }
    1414       25906 :     span->pool = DsaAreaPoolToDsaPointer(area, pool);
    1415       25906 :     span->nextspan = pool->spans[1];
    1416       25906 :     span->prevspan = InvalidDsaPointer;
    1417       25906 :     pool->spans[1] = span_pointer;
    1418             : 
    1419       25906 :     span->start = start;
    1420       25906 :     span->npages = npages;
    1421       25906 :     span->size_class = size_class;
    1422       25906 :     span->ninitialized = 0;
    1423       25906 :     if (size_class == DSA_SCLASS_BLOCK_OF_SPANS)
    1424             :     {
    1425             :         /*
    1426             :          * A block-of-spans contains its own descriptor, so mark one object as
    1427             :          * initialized and reduce the count of allocatable objects by one.
    1428             :          * Doing this here has the side effect of also reducing nmax by one,
    1429             :          * which is important to make sure we free this object at the correct
    1430             :          * time.
    1431             :          */
    1432        2616 :         span->ninitialized = 1;
    1433        2616 :         span->nallocatable = FPM_PAGE_SIZE / obsize - 1;
    1434             :     }
    1435       23290 :     else if (size_class != DSA_SCLASS_SPAN_LARGE)
    1436       17790 :         span->nallocatable = DSA_SUPERBLOCK_SIZE / obsize;
    1437       25906 :     span->firstfree = DSA_SPAN_NOTHING_FREE;
    1438       25906 :     span->nmax = span->nallocatable;
    1439       25906 :     span->fclass = 1;
    1440       25906 : }
    1441             : 
    1442             : /*
    1443             :  * Transfer the first span in one fullness class to the head of another
    1444             :  * fullness class.
    1445             :  */
    1446             : static bool
    1447       43102 : transfer_first_span(dsa_area *area,
    1448             :                     dsa_area_pool *pool, int fromclass, int toclass)
    1449             : {
    1450             :     dsa_pointer span_pointer;
    1451             :     dsa_area_span *span;
    1452             :     dsa_area_span *nextspan;
    1453             : 
    1454             :     /* Can't do it if source list is empty. */
    1455       43102 :     span_pointer = pool->spans[fromclass];
    1456       43102 :     if (!DsaPointerIsValid(span_pointer))
    1457       40816 :         return false;
    1458             : 
    1459             :     /* Remove span from head of source list. */
    1460        2286 :     span = dsa_get_address(area, span_pointer);
    1461        2286 :     pool->spans[fromclass] = span->nextspan;
    1462        2286 :     if (DsaPointerIsValid(span->nextspan))
    1463             :     {
    1464             :         nextspan = (dsa_area_span *)
    1465         148 :             dsa_get_address(area, span->nextspan);
    1466         148 :         nextspan->prevspan = InvalidDsaPointer;
    1467             :     }
    1468             : 
    1469             :     /* Add span to head of target list. */
    1470        2286 :     span->nextspan = pool->spans[toclass];
    1471        2286 :     pool->spans[toclass] = span_pointer;
    1472        2286 :     if (DsaPointerIsValid(span->nextspan))
    1473             :     {
    1474             :         nextspan = (dsa_area_span *)
    1475         772 :             dsa_get_address(area, span->nextspan);
    1476         772 :         nextspan->prevspan = span_pointer;
    1477             :     }
    1478        2286 :     span->fclass = toclass;
    1479             : 
    1480        2286 :     return true;
    1481             : }
    1482             : 
    1483             : /*
    1484             :  * Allocate one object of the requested size class from the given area.
    1485             :  */
    1486             : static inline dsa_pointer
    1487     1303530 : alloc_object(dsa_area *area, int size_class)
    1488             : {
    1489     1303530 :     dsa_area_pool *pool = &area->control->pools[size_class];
    1490             :     dsa_area_span *span;
    1491             :     dsa_pointer block;
    1492             :     dsa_pointer result;
    1493             :     char       *object;
    1494             :     size_t      size;
    1495             : 
    1496             :     /*
    1497             :      * Even though ensure_active_superblock can in turn call alloc_object if
    1498             :      * it needs to allocate a new span, that's always from a different pool,
    1499             :      * and the order of lock acquisition is always the same, so it's OK that
    1500             :      * we hold this lock for the duration of this function.
    1501             :      */
    1502             :     Assert(!LWLockHeldByMe(DSA_SCLASS_LOCK(area, size_class)));
    1503     1303530 :     LWLockAcquire(DSA_SCLASS_LOCK(area, size_class), LW_EXCLUSIVE);
    1504             : 
    1505             :     /*
    1506             :      * If there's no active superblock, we must successfully obtain one or
    1507             :      * fail the request.
    1508             :      */
    1509     1303530 :     if (!DsaPointerIsValid(pool->spans[1]) &&
    1510       20622 :         !ensure_active_superblock(area, pool, size_class))
    1511             :     {
    1512           0 :         result = InvalidDsaPointer;
    1513             :     }
    1514             :     else
    1515             :     {
    1516             :         /*
    1517             :          * There should be a block in fullness class 1 at this point, and it
    1518             :          * should never be completely full.  Thus we can either pop an object
    1519             :          * from the free list or, failing that, initialize a new object.
    1520             :          */
    1521             :         Assert(DsaPointerIsValid(pool->spans[1]));
    1522             :         span = (dsa_area_span *)
    1523     1303530 :             dsa_get_address(area, pool->spans[1]);
    1524             :         Assert(span->nallocatable > 0);
    1525     1303530 :         block = span->start;
    1526             :         Assert(size_class < DSA_NUM_SIZE_CLASSES);
    1527     1303530 :         size = dsa_size_classes[size_class];
    1528     1303530 :         if (span->firstfree != DSA_SPAN_NOTHING_FREE)
    1529             :         {
    1530      193928 :             result = block + span->firstfree * size;
    1531      193928 :             object = dsa_get_address(area, result);
    1532      193928 :             span->firstfree = NextFreeObjectIndex(object);
    1533             :         }
    1534             :         else
    1535             :         {
    1536     1109602 :             result = block + span->ninitialized * size;
    1537     1109602 :             ++span->ninitialized;
    1538             :         }
    1539     1303530 :         --span->nallocatable;
    1540             : 
    1541             :         /* If it's now full, move it to the highest-numbered fullness class. */
    1542     1303530 :         if (span->nallocatable == 0)
    1543        2074 :             transfer_first_span(area, pool, 1, DSA_FULLNESS_CLASSES - 1);
    1544             :     }
    1545             : 
    1546             :     Assert(LWLockHeldByMe(DSA_SCLASS_LOCK(area, size_class)));
    1547     1303530 :     LWLockRelease(DSA_SCLASS_LOCK(area, size_class));
    1548             : 
    1549     1303530 :     return result;
    1550             : }
    1551             : 
    1552             : /*
    1553             :  * Ensure an active (i.e. fullness class 1) superblock, unless all existing
    1554             :  * superblocks are completely full and no more can be allocated.
    1555             :  *
    1556             :  * Fullness classes K of 0..N are loosely intended to represent blocks whose
    1557             :  * utilization percentage is at least K/N, but we only enforce this rigorously
    1558             :  * for the highest-numbered fullness class, which always contains exactly
    1559             :  * those blocks that are completely full.  It's otherwise acceptable for a
    1560             :  * block to be in a higher-numbered fullness class than the one to which it
    1561             :  * logically belongs.  In addition, the active block, which is always the
    1562             :  * first block in fullness class 1, is permitted to have a higher allocation
    1563             :  * percentage than would normally be allowable for that fullness class; we
    1564             :  * don't move it until it's completely full, and then it goes to the
    1565             :  * highest-numbered fullness class.
    1566             :  *
    1567             :  * It might seem odd that the active block is the head of fullness class 1
    1568             :  * rather than fullness class 0, but experience with other allocators has
    1569             :  * shown that it's usually better to allocate from a block that's moderately
    1570             :  * full rather than one that's nearly empty.  Insofar as is reasonably
    1571             :  * possible, we want to avoid performing new allocations in a block that would
    1572             :  * otherwise become empty soon.
    1573             :  */
    1574             : static bool
    1575       20622 : ensure_active_superblock(dsa_area *area, dsa_area_pool *pool,
    1576             :                          int size_class)
    1577             : {
    1578             :     dsa_pointer span_pointer;
    1579             :     dsa_pointer start_pointer;
    1580       20622 :     size_t      obsize = dsa_size_classes[size_class];
    1581             :     size_t      nmax;
    1582             :     int         fclass;
    1583       20622 :     size_t      npages = 1;
    1584             :     size_t      first_page;
    1585             :     size_t      i;
    1586             :     dsa_segment_map *segment_map;
    1587             : 
    1588             :     Assert(LWLockHeldByMe(DSA_SCLASS_LOCK(area, size_class)));
    1589             : 
    1590             :     /*
    1591             :      * Compute the number of objects that will fit in a block of this size
    1592             :      * class.  Span-of-spans blocks are just a single page, and the first
    1593             :      * object isn't available for use because it describes the block-of-spans
    1594             :      * itself.
    1595             :      */
    1596       20622 :     if (size_class == DSA_SCLASS_BLOCK_OF_SPANS)
    1597        2616 :         nmax = FPM_PAGE_SIZE / obsize - 1;
    1598             :     else
    1599       18006 :         nmax = DSA_SUPERBLOCK_SIZE / obsize;
    1600             : 
    1601             :     /*
    1602             :      * If fullness class 1 is empty, try to find a span to put in it by
    1603             :      * scanning higher-numbered fullness classes (excluding the last one,
    1604             :      * whose blocks are certain to all be completely full).
    1605             :      */
    1606       41240 :     for (fclass = 2; fclass < DSA_FULLNESS_CLASSES - 1; ++fclass)
    1607             :     {
    1608       20622 :         span_pointer = pool->spans[fclass];
    1609             : 
    1610       21044 :         while (DsaPointerIsValid(span_pointer))
    1611             :         {
    1612             :             int         tfclass;
    1613             :             dsa_area_span *span;
    1614             :             dsa_area_span *nextspan;
    1615             :             dsa_area_span *prevspan;
    1616             :             dsa_pointer next_span_pointer;
    1617             : 
    1618             :             span = (dsa_area_span *)
    1619         422 :                 dsa_get_address(area, span_pointer);
    1620         422 :             next_span_pointer = span->nextspan;
    1621             : 
    1622             :             /* Figure out what fullness class should contain this span. */
    1623         422 :             tfclass = (nmax - span->nallocatable)
    1624         422 :                 * (DSA_FULLNESS_CLASSES - 1) / nmax;
    1625             : 
    1626             :             /* Look up next span. */
    1627         422 :             if (DsaPointerIsValid(span->nextspan))
    1628             :                 nextspan = (dsa_area_span *)
    1629         208 :                     dsa_get_address(area, span->nextspan);
    1630             :             else
    1631         214 :                 nextspan = NULL;
    1632             : 
    1633             :             /*
    1634             :              * If utilization has dropped enough that this now belongs in some
    1635             :              * other fullness class, move it there.
    1636             :              */
    1637         422 :             if (tfclass < fclass)
    1638             :             {
    1639             :                 /* Remove from the current fullness class list. */
    1640           8 :                 if (pool->spans[fclass] == span_pointer)
    1641             :                 {
    1642             :                     /* It was the head; remove it. */
    1643             :                     Assert(!DsaPointerIsValid(span->prevspan));
    1644           8 :                     pool->spans[fclass] = span->nextspan;
    1645           8 :                     if (nextspan != NULL)
    1646           2 :                         nextspan->prevspan = InvalidDsaPointer;
    1647             :                 }
    1648             :                 else
    1649             :                 {
    1650             :                     /* It was not the head. */
    1651             :                     Assert(DsaPointerIsValid(span->prevspan));
    1652             :                     prevspan = (dsa_area_span *)
    1653           0 :                         dsa_get_address(area, span->prevspan);
    1654           0 :                     prevspan->nextspan = span->nextspan;
    1655             :                 }
    1656           8 :                 if (nextspan != NULL)
    1657           2 :                     nextspan->prevspan = span->prevspan;
    1658             : 
    1659             :                 /* Push onto the head of the new fullness class list. */
    1660           8 :                 span->nextspan = pool->spans[tfclass];
    1661           8 :                 pool->spans[tfclass] = span_pointer;
    1662           8 :                 span->prevspan = InvalidDsaPointer;
    1663           8 :                 if (DsaPointerIsValid(span->nextspan))
    1664             :                 {
    1665             :                     nextspan = (dsa_area_span *)
    1666           2 :                         dsa_get_address(area, span->nextspan);
    1667           2 :                     nextspan->prevspan = span_pointer;
    1668             :                 }
    1669           8 :                 span->fclass = tfclass;
    1670             :             }
    1671             : 
    1672             :             /* Advance to next span on list. */
    1673         422 :             span_pointer = next_span_pointer;
    1674             :         }
    1675             : 
    1676             :         /* Stop now if we found a suitable block. */
    1677       20622 :         if (DsaPointerIsValid(pool->spans[1]))
    1678           4 :             return true;
    1679             :     }
    1680             : 
    1681             :     /*
    1682             :      * If there are no blocks that properly belong in fullness class 1, pick
    1683             :      * one from some other fullness class and move it there anyway, so that we
    1684             :      * have an allocation target.  Our last choice is to transfer a block
    1685             :      * that's almost empty (and might become completely empty soon if left
    1686             :      * alone), but even that is better than failing, which is what we must do
    1687             :      * if there are no blocks at all with freespace.
    1688             :      */
    1689             :     Assert(!DsaPointerIsValid(pool->spans[1]));
    1690       41028 :     for (fclass = 2; fclass < DSA_FULLNESS_CLASSES - 1; ++fclass)
    1691       20618 :         if (transfer_first_span(area, pool, fclass, 1))
    1692         208 :             return true;
    1693       40820 :     if (!DsaPointerIsValid(pool->spans[1]) &&
    1694       20410 :         transfer_first_span(area, pool, 0, 1))
    1695           4 :         return true;
    1696             : 
    1697             :     /*
    1698             :      * We failed to find an existing span with free objects, so we need to
    1699             :      * allocate a new superblock and construct a new span to manage it.
    1700             :      *
    1701             :      * First, get a dsa_area_span object to describe the new superblock block
    1702             :      * ... unless this allocation is for a dsa_area_span object, in which case
    1703             :      * that's surely not going to work.  We handle that case by storing the
    1704             :      * span describing a block-of-spans inline.
    1705             :      */
    1706       20406 :     if (size_class != DSA_SCLASS_BLOCK_OF_SPANS)
    1707             :     {
    1708       17790 :         span_pointer = alloc_object(area, DSA_SCLASS_BLOCK_OF_SPANS);
    1709       17790 :         if (!DsaPointerIsValid(span_pointer))
    1710           0 :             return false;
    1711       17790 :         npages = DSA_PAGES_PER_SUPERBLOCK;
    1712             :     }
    1713             : 
    1714             :     /* Find or create a segment and allocate the superblock. */
    1715       20406 :     LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
    1716       20406 :     segment_map = get_best_segment(area, npages);
    1717       20406 :     if (segment_map == NULL)
    1718             :     {
    1719        1988 :         segment_map = make_new_segment(area, npages);
    1720        1988 :         if (segment_map == NULL)
    1721             :         {
    1722           0 :             LWLockRelease(DSA_AREA_LOCK(area));
    1723           0 :             return false;
    1724             :         }
    1725             :     }
    1726             : 
    1727             :     /*
    1728             :      * This shouldn't happen: get_best_segment() or make_new_segment()
    1729             :      * promised that we can successfully allocate npages.
    1730             :      */
    1731       20406 :     if (!FreePageManagerGet(segment_map->fpm, npages, &first_page))
    1732           0 :         elog(FATAL,
    1733             :              "dsa_allocate could not find %zu free pages for superblock",
    1734             :              npages);
    1735       20406 :     LWLockRelease(DSA_AREA_LOCK(area));
    1736             : 
    1737             :     /* Compute the start of the superblock. */
    1738       20406 :     start_pointer =
    1739       20406 :         DSA_MAKE_POINTER(get_segment_index(area, segment_map),
    1740             :                          first_page * FPM_PAGE_SIZE);
    1741             : 
    1742             :     /*
    1743             :      * If this is a block-of-spans, carve the descriptor right out of the
    1744             :      * allocated space.
    1745             :      */
    1746       20406 :     if (size_class == DSA_SCLASS_BLOCK_OF_SPANS)
    1747             :     {
    1748             :         /*
    1749             :          * We have a pointer into the segment.  We need to build a dsa_pointer
    1750             :          * from the segment index and offset into the segment.
    1751             :          */
    1752        2616 :         span_pointer = start_pointer;
    1753             :     }
    1754             : 
    1755             :     /* Initialize span and pagemap. */
    1756       20406 :     init_span(area, span_pointer, pool, start_pointer, npages, size_class);
    1757      307662 :     for (i = 0; i < npages; ++i)
    1758      287256 :         segment_map->pagemap[first_page + i] = span_pointer;
    1759             : 
    1760       20406 :     return true;
    1761             : }
    1762             : 
    1763             : /*
    1764             :  * Return the segment map corresponding to a given segment index, mapping the
    1765             :  * segment in if necessary.  For internal segment book-keeping, this is called
    1766             :  * with the area lock held.  It is also called by dsa_free and dsa_get_address
    1767             :  * without any locking, relying on the fact they have a known live segment
    1768             :  * index and they always call check_for_freed_segments to ensures that any
    1769             :  * freed segment occupying the same slot is detached first.
    1770             :  */
    1771             : static dsa_segment_map *
    1772      298862 : get_segment_by_index(dsa_area *area, dsa_segment_index index)
    1773             : {
    1774      298862 :     if (unlikely(area->segment_maps[index].mapped_address == NULL))
    1775             :     {
    1776             :         dsm_handle  handle;
    1777             :         dsm_segment *segment;
    1778             :         dsa_segment_map *segment_map;
    1779             :         ResourceOwner oldowner;
    1780             : 
    1781             :         /*
    1782             :          * If we are reached by dsa_free or dsa_get_address, there must be at
    1783             :          * least one object allocated in the referenced segment.  Otherwise,
    1784             :          * their caller has a double-free or access-after-free bug, which we
    1785             :          * have no hope of detecting.  So we know it's safe to access this
    1786             :          * array slot without holding a lock; it won't change underneath us.
    1787             :          * Furthermore, we know that we can see the latest contents of the
    1788             :          * slot, as explained in check_for_freed_segments, which those
    1789             :          * functions call before arriving here.
    1790             :          */
    1791       37384 :         handle = area->control->segment_handles[index];
    1792             : 
    1793             :         /* It's an error to try to access an unused slot. */
    1794       37384 :         if (handle == DSM_HANDLE_INVALID)
    1795           0 :             elog(ERROR,
    1796             :                  "dsa_area could not attach to a segment that has been freed");
    1797             : 
    1798       37384 :         oldowner = CurrentResourceOwner;
    1799       37384 :         CurrentResourceOwner = area->resowner;
    1800       37384 :         segment = dsm_attach(handle);
    1801       37384 :         CurrentResourceOwner = oldowner;
    1802       37384 :         if (segment == NULL)
    1803           0 :             elog(ERROR, "dsa_area could not attach to segment");
    1804       37384 :         segment_map = &area->segment_maps[index];
    1805       37384 :         segment_map->segment = segment;
    1806       37384 :         segment_map->mapped_address = dsm_segment_address(segment);
    1807       37384 :         segment_map->header =
    1808       37384 :             (dsa_segment_header *) segment_map->mapped_address;
    1809       37384 :         segment_map->fpm = (FreePageManager *)
    1810       37384 :             (segment_map->mapped_address +
    1811             :              MAXALIGN(sizeof(dsa_segment_header)));
    1812       37384 :         segment_map->pagemap = (dsa_pointer *)
    1813       37384 :             (segment_map->mapped_address +
    1814       37384 :              MAXALIGN(sizeof(dsa_segment_header)) +
    1815             :              MAXALIGN(sizeof(FreePageManager)));
    1816             : 
    1817             :         /* Remember the highest index this backend has ever mapped. */
    1818       37384 :         if (area->high_segment_index < index)
    1819       37144 :             area->high_segment_index = index;
    1820             : 
    1821             :         Assert(segment_map->header->magic ==
    1822             :                (DSA_SEGMENT_HEADER_MAGIC ^ area->control->handle ^ index));
    1823             :     }
    1824             : 
    1825             :     /*
    1826             :      * Callers of dsa_get_address() and dsa_free() don't hold the area lock,
    1827             :      * but it's a bug in the calling code and undefined behavior if the
    1828             :      * address is not live (ie if the segment might possibly have been freed,
    1829             :      * they're trying to use a dangling pointer).
    1830             :      *
    1831             :      * For dsa.c code that holds the area lock to manipulate segment_bins
    1832             :      * lists, it would be a bug if we ever reach a freed segment here.  After
    1833             :      * it's marked as freed, the only thing any backend should do with it is
    1834             :      * unmap it, and it should always have done that in
    1835             :      * check_for_freed_segments_locked() before arriving here to resolve an
    1836             :      * index to a segment_map.
    1837             :      *
    1838             :      * Either way we can assert that we aren't returning a freed segment.
    1839             :      */
    1840             :     Assert(!area->segment_maps[index].header->freed);
    1841             : 
    1842      298862 :     return &area->segment_maps[index];
    1843             : }
    1844             : 
    1845             : /*
    1846             :  * Return a superblock to the free page manager.  If the underlying segment
    1847             :  * has become entirely free, then return it to the operating system.
    1848             :  *
    1849             :  * The appropriate pool lock must be held.
    1850             :  */
    1851             : static void
    1852          18 : destroy_superblock(dsa_area *area, dsa_pointer span_pointer)
    1853             : {
    1854          18 :     dsa_area_span *span = dsa_get_address(area, span_pointer);
    1855          18 :     int         size_class = span->size_class;
    1856             :     dsa_segment_map *segment_map;
    1857             : 
    1858             : 
    1859             :     /* Remove it from its fullness class list. */
    1860          18 :     unlink_span(area, span);
    1861             : 
    1862             :     /*
    1863             :      * Note: Here we acquire the area lock while we already hold a per-pool
    1864             :      * lock.  We never hold the area lock and then take a pool lock, or we
    1865             :      * could deadlock.
    1866             :      */
    1867          18 :     LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
    1868          18 :     check_for_freed_segments_locked(area);
    1869             :     segment_map =
    1870          18 :         get_segment_by_index(area, DSA_EXTRACT_SEGMENT_NUMBER(span->start));
    1871          18 :     FreePageManagerPut(segment_map->fpm,
    1872          18 :                        DSA_EXTRACT_OFFSET(span->start) / FPM_PAGE_SIZE,
    1873             :                        span->npages);
    1874             :     /* Check if the segment is now entirely free. */
    1875          18 :     if (fpm_largest(segment_map->fpm) == segment_map->header->usable_pages)
    1876             :     {
    1877           0 :         dsa_segment_index index = get_segment_index(area, segment_map);
    1878             : 
    1879             :         /* If it's not the segment with extra control data, free it. */
    1880           0 :         if (index != 0)
    1881             :         {
    1882             :             /*
    1883             :              * Give it back to the OS, and allow other backends to detect that
    1884             :              * they need to detach.
    1885             :              */
    1886           0 :             unlink_segment(area, segment_map);
    1887           0 :             segment_map->header->freed = true;
    1888             :             Assert(area->control->total_segment_size >=
    1889             :                    segment_map->header->size);
    1890           0 :             area->control->total_segment_size -=
    1891           0 :                 segment_map->header->size;
    1892           0 :             dsm_unpin_segment(dsm_segment_handle(segment_map->segment));
    1893           0 :             dsm_detach(segment_map->segment);
    1894           0 :             area->control->segment_handles[index] = DSM_HANDLE_INVALID;
    1895           0 :             ++area->control->freed_segment_counter;
    1896           0 :             segment_map->segment = NULL;
    1897           0 :             segment_map->header = NULL;
    1898           0 :             segment_map->mapped_address = NULL;
    1899             :         }
    1900             :     }
    1901             : 
    1902             :     /* Move segment to appropriate bin if necessary. */
    1903          18 :     if (segment_map->header != NULL)
    1904          18 :         rebin_segment(area, segment_map);
    1905             : 
    1906          18 :     LWLockRelease(DSA_AREA_LOCK(area));
    1907             : 
    1908             :     /*
    1909             :      * Span-of-spans blocks store the span which describes them within the
    1910             :      * block itself, so freeing the storage implicitly frees the descriptor
    1911             :      * also.  If this is a block of any other type, we need to separately free
    1912             :      * the span object also.  This recursive call to dsa_free will acquire the
    1913             :      * span pool's lock.  We can't deadlock because the acquisition order is
    1914             :      * always some other pool and then the span pool.
    1915             :      */
    1916          18 :     if (size_class != DSA_SCLASS_BLOCK_OF_SPANS)
    1917          18 :         dsa_free(area, span_pointer);
    1918          18 : }
    1919             : 
    1920             : static void
    1921        4590 : unlink_span(dsa_area *area, dsa_area_span *span)
    1922             : {
    1923        4590 :     if (DsaPointerIsValid(span->nextspan))
    1924             :     {
    1925        3700 :         dsa_area_span *next = dsa_get_address(area, span->nextspan);
    1926             : 
    1927        3700 :         next->prevspan = span->prevspan;
    1928             :     }
    1929        4590 :     if (DsaPointerIsValid(span->prevspan))
    1930             :     {
    1931        2212 :         dsa_area_span *prev = dsa_get_address(area, span->prevspan);
    1932             : 
    1933        2212 :         prev->nextspan = span->nextspan;
    1934             :     }
    1935             :     else
    1936             :     {
    1937        2378 :         dsa_area_pool *pool = dsa_get_address(area, span->pool);
    1938             : 
    1939        2378 :         pool->spans[span->fclass] = span->nextspan;
    1940             :     }
    1941        4590 : }
    1942             : 
    1943             : static void
    1944         346 : add_span_to_fullness_class(dsa_area *area, dsa_area_span *span,
    1945             :                            dsa_pointer span_pointer,
    1946             :                            int fclass)
    1947             : {
    1948         346 :     dsa_area_pool *pool = dsa_get_address(area, span->pool);
    1949             : 
    1950         346 :     if (DsaPointerIsValid(pool->spans[fclass]))
    1951             :     {
    1952         178 :         dsa_area_span *head = dsa_get_address(area,
    1953             :                                               pool->spans[fclass]);
    1954             : 
    1955         178 :         head->prevspan = span_pointer;
    1956             :     }
    1957         346 :     span->prevspan = InvalidDsaPointer;
    1958         346 :     span->nextspan = pool->spans[fclass];
    1959         346 :     pool->spans[fclass] = span_pointer;
    1960         346 :     span->fclass = fclass;
    1961         346 : }
    1962             : 
    1963             : /*
    1964             :  * Detach from an area that was either created or attached to by this process.
    1965             :  */
    1966             : void
    1967       50610 : dsa_detach(dsa_area *area)
    1968             : {
    1969             :     int         i;
    1970             : 
    1971             :     /* Detach from all segments. */
    1972      140624 :     for (i = 0; i <= area->high_segment_index; ++i)
    1973       90014 :         if (area->segment_maps[i].segment != NULL)
    1974       39474 :             dsm_detach(area->segment_maps[i].segment);
    1975             : 
    1976             :     /*
    1977             :      * Note that 'detaching' (= detaching from DSM segments) doesn't include
    1978             :      * 'releasing' (= adjusting the reference count).  It would be nice to
    1979             :      * combine these operations, but client code might never get around to
    1980             :      * calling dsa_detach because of an error path, and a detach hook on any
    1981             :      * particular segment is too late to detach other segments in the area
    1982             :      * without risking a 'leak' warning in the non-error path.
    1983             :      */
    1984             : 
    1985             :     /* Free the backend-local area object. */
    1986       50610 :     pfree(area);
    1987       50610 : }
    1988             : 
    1989             : /*
    1990             :  * Unlink a segment from the bin that contains it.
    1991             :  */
    1992             : static void
    1993        4540 : unlink_segment(dsa_area *area, dsa_segment_map *segment_map)
    1994             : {
    1995        4540 :     if (segment_map->header->prev != DSA_SEGMENT_INDEX_NONE)
    1996             :     {
    1997             :         dsa_segment_map *prev;
    1998             : 
    1999           2 :         prev = get_segment_by_index(area, segment_map->header->prev);
    2000           2 :         prev->header->next = segment_map->header->next;
    2001             :     }
    2002             :     else
    2003             :     {
    2004             :         Assert(area->control->segment_bins[segment_map->header->bin] ==
    2005             :                get_segment_index(area, segment_map));
    2006        4538 :         area->control->segment_bins[segment_map->header->bin] =
    2007        4538 :             segment_map->header->next;
    2008             :     }
    2009        4540 :     if (segment_map->header->next != DSA_SEGMENT_INDEX_NONE)
    2010             :     {
    2011             :         dsa_segment_map *next;
    2012             : 
    2013           0 :         next = get_segment_by_index(area, segment_map->header->next);
    2014           0 :         next->header->prev = segment_map->header->prev;
    2015             :     }
    2016        4540 : }
    2017             : 
    2018             : /*
    2019             :  * Find a segment that could satisfy a request for 'npages' of contiguous
    2020             :  * memory, or return NULL if none can be found.  This may involve attaching to
    2021             :  * segments that weren't previously attached so that we can query their free
    2022             :  * pages map.
    2023             :  */
    2024             : static dsa_segment_map *
    2025       25906 : get_best_segment(dsa_area *area, size_t npages)
    2026             : {
    2027             :     size_t      bin;
    2028             : 
    2029             :     Assert(LWLockHeldByMe(DSA_AREA_LOCK(area)));
    2030       25906 :     check_for_freed_segments_locked(area);
    2031             : 
    2032             :     /*
    2033             :      * Start searching from the first bin that *might* have enough contiguous
    2034             :      * pages.
    2035             :      */
    2036       25906 :     for (bin = contiguous_pages_to_segment_bin(npages);
    2037      112788 :          bin < DSA_NUM_SEGMENT_BINS;
    2038       86882 :          ++bin)
    2039             :     {
    2040             :         /*
    2041             :          * The minimum contiguous size that any segment in this bin should
    2042             :          * have.  We'll re-bin if we see segments with fewer.
    2043             :          */
    2044      110754 :         size_t      threshold = (size_t) 1 << (bin - 1);
    2045             :         dsa_segment_index segment_index;
    2046             : 
    2047             :         /* Search this bin for a segment with enough contiguous space. */
    2048      110754 :         segment_index = area->control->segment_bins[bin];
    2049      112626 :         while (segment_index != DSA_SEGMENT_INDEX_NONE)
    2050             :         {
    2051             :             dsa_segment_map *segment_map;
    2052             :             dsa_segment_index next_segment_index;
    2053             :             size_t      contiguous_pages;
    2054             : 
    2055       25744 :             segment_map = get_segment_by_index(area, segment_index);
    2056       25744 :             next_segment_index = segment_map->header->next;
    2057       25744 :             contiguous_pages = fpm_largest(segment_map->fpm);
    2058             : 
    2059             :             /* Not enough for the request, still enough for this bin. */
    2060       25744 :             if (contiguous_pages >= threshold && contiguous_pages < npages)
    2061             :             {
    2062           0 :                 segment_index = next_segment_index;
    2063           0 :                 continue;
    2064             :             }
    2065             : 
    2066             :             /* Re-bin it if it's no longer in the appropriate bin. */
    2067       25744 :             if (contiguous_pages < threshold)
    2068             :             {
    2069        4160 :                 rebin_segment(area, segment_map);
    2070             : 
    2071             :                 /*
    2072             :                  * But fall through to see if it's enough to satisfy this
    2073             :                  * request anyway....
    2074             :                  */
    2075             :             }
    2076             : 
    2077             :             /* Check if we are done. */
    2078       25744 :             if (contiguous_pages >= npages)
    2079       23872 :                 return segment_map;
    2080             : 
    2081             :             /* Continue searching the same bin. */
    2082        1872 :             segment_index = next_segment_index;
    2083             :         }
    2084             :     }
    2085             : 
    2086             :     /* Not found. */
    2087        2034 :     return NULL;
    2088             : }
    2089             : 
    2090             : /*
    2091             :  * Create a new segment that can handle at least requested_pages.  Returns
    2092             :  * NULL if the requested total size limit or maximum allowed number of
    2093             :  * segments would be exceeded.
    2094             :  */
    2095             : static dsa_segment_map *
    2096        2034 : make_new_segment(dsa_area *area, size_t requested_pages)
    2097             : {
    2098             :     dsa_segment_index new_index;
    2099             :     size_t      metadata_bytes;
    2100             :     size_t      total_size;
    2101             :     size_t      total_pages;
    2102             :     size_t      usable_pages;
    2103             :     dsa_segment_map *segment_map;
    2104             :     dsm_segment *segment;
    2105             :     ResourceOwner oldowner;
    2106             : 
    2107             :     Assert(LWLockHeldByMe(DSA_AREA_LOCK(area)));
    2108             : 
    2109             :     /* Find a segment slot that is not in use (linearly for now). */
    2110        2114 :     for (new_index = 1; new_index < DSA_MAX_SEGMENTS; ++new_index)
    2111             :     {
    2112        2114 :         if (area->control->segment_handles[new_index] == DSM_HANDLE_INVALID)
    2113        2034 :             break;
    2114             :     }
    2115        2034 :     if (new_index == DSA_MAX_SEGMENTS)
    2116           0 :         return NULL;
    2117             : 
    2118             :     /*
    2119             :      * If the total size limit is already exceeded, then we exit early and
    2120             :      * avoid arithmetic wraparound in the unsigned expressions below.
    2121             :      */
    2122        2034 :     if (area->control->total_segment_size >=
    2123        2034 :         area->control->max_total_segment_size)
    2124           0 :         return NULL;
    2125             : 
    2126             :     /*
    2127             :      * The size should be at least as big as requested, and at least big
    2128             :      * enough to follow a geometric series that approximately doubles the
    2129             :      * total storage each time we create a new segment.  We use geometric
    2130             :      * growth because the underlying DSM system isn't designed for large
    2131             :      * numbers of segments (otherwise we might even consider just using one
    2132             :      * DSM segment for each large allocation and for each superblock, and then
    2133             :      * we wouldn't need to use FreePageManager).
    2134             :      *
    2135             :      * We decide on a total segment size first, so that we produce tidy
    2136             :      * power-of-two sized segments.  This is a good property to have if we
    2137             :      * move to huge pages in the future.  Then we work back to the number of
    2138             :      * pages we can fit.
    2139             :      */
    2140        2034 :     total_size = area->control->init_segment_size *
    2141        2034 :         ((size_t) 1 << (new_index / DSA_NUM_SEGMENTS_AT_EACH_SIZE));
    2142        2034 :     total_size = Min(total_size, area->control->max_segment_size);
    2143        2034 :     total_size = Min(total_size,
    2144             :                      area->control->max_total_segment_size -
    2145             :                      area->control->total_segment_size);
    2146             : 
    2147        2034 :     total_pages = total_size / FPM_PAGE_SIZE;
    2148        2034 :     metadata_bytes =
    2149             :         MAXALIGN(sizeof(dsa_segment_header)) +
    2150        2034 :         MAXALIGN(sizeof(FreePageManager)) +
    2151             :         sizeof(dsa_pointer) * total_pages;
    2152             : 
    2153             :     /* Add padding up to next page boundary. */
    2154        2034 :     if (metadata_bytes % FPM_PAGE_SIZE != 0)
    2155        2034 :         metadata_bytes += FPM_PAGE_SIZE - (metadata_bytes % FPM_PAGE_SIZE);
    2156        2034 :     if (total_size <= metadata_bytes)
    2157           0 :         return NULL;
    2158        2034 :     usable_pages = (total_size - metadata_bytes) / FPM_PAGE_SIZE;
    2159             :     Assert(metadata_bytes + usable_pages * FPM_PAGE_SIZE <= total_size);
    2160             : 
    2161             :     /* See if that is enough... */
    2162        2034 :     if (requested_pages > usable_pages)
    2163             :     {
    2164             :         /*
    2165             :          * We'll make an odd-sized segment, working forward from the requested
    2166             :          * number of pages.
    2167             :          */
    2168           0 :         usable_pages = requested_pages;
    2169           0 :         metadata_bytes =
    2170             :             MAXALIGN(sizeof(dsa_segment_header)) +
    2171           0 :             MAXALIGN(sizeof(FreePageManager)) +
    2172             :             usable_pages * sizeof(dsa_pointer);
    2173             : 
    2174             :         /* Add padding up to next page boundary. */
    2175           0 :         if (metadata_bytes % FPM_PAGE_SIZE != 0)
    2176           0 :             metadata_bytes += FPM_PAGE_SIZE - (metadata_bytes % FPM_PAGE_SIZE);
    2177           0 :         total_size = metadata_bytes + usable_pages * FPM_PAGE_SIZE;
    2178             : 
    2179             :         /* Is that too large for dsa_pointer's addressing scheme? */
    2180           0 :         if (total_size > DSA_MAX_SEGMENT_SIZE)
    2181           0 :             return NULL;
    2182             : 
    2183             :         /* Would that exceed the limit? */
    2184           0 :         if (total_size > area->control->max_total_segment_size -
    2185           0 :             area->control->total_segment_size)
    2186           0 :             return NULL;
    2187             :     }
    2188             : 
    2189             :     /* Create the segment. */
    2190        2034 :     oldowner = CurrentResourceOwner;
    2191        2034 :     CurrentResourceOwner = area->resowner;
    2192        2034 :     segment = dsm_create(total_size, 0);
    2193        2034 :     CurrentResourceOwner = oldowner;
    2194        2034 :     if (segment == NULL)
    2195           0 :         return NULL;
    2196        2034 :     dsm_pin_segment(segment);
    2197             : 
    2198             :     /* Store the handle in shared memory to be found by index. */
    2199        4068 :     area->control->segment_handles[new_index] =
    2200        2034 :         dsm_segment_handle(segment);
    2201             :     /* Track the highest segment index in the history of the area. */
    2202        2034 :     if (area->control->high_segment_index < new_index)
    2203        2034 :         area->control->high_segment_index = new_index;
    2204             :     /* Track the highest segment index this backend has ever mapped. */
    2205        2034 :     if (area->high_segment_index < new_index)
    2206        2034 :         area->high_segment_index = new_index;
    2207             :     /* Track total size of all segments. */
    2208        2034 :     area->control->total_segment_size += total_size;
    2209             :     Assert(area->control->total_segment_size <=
    2210             :            area->control->max_total_segment_size);
    2211             : 
    2212             :     /* Build a segment map for this segment in this backend. */
    2213        2034 :     segment_map = &area->segment_maps[new_index];
    2214        2034 :     segment_map->segment = segment;
    2215        2034 :     segment_map->mapped_address = dsm_segment_address(segment);
    2216        2034 :     segment_map->header = (dsa_segment_header *) segment_map->mapped_address;
    2217        2034 :     segment_map->fpm = (FreePageManager *)
    2218        2034 :         (segment_map->mapped_address +
    2219             :          MAXALIGN(sizeof(dsa_segment_header)));
    2220        2034 :     segment_map->pagemap = (dsa_pointer *)
    2221        2034 :         (segment_map->mapped_address +
    2222        2034 :          MAXALIGN(sizeof(dsa_segment_header)) +
    2223             :          MAXALIGN(sizeof(FreePageManager)));
    2224             : 
    2225             :     /* Set up the free page map. */
    2226        2034 :     FreePageManagerInitialize(segment_map->fpm, segment_map->mapped_address);
    2227        2034 :     FreePageManagerPut(segment_map->fpm, metadata_bytes / FPM_PAGE_SIZE,
    2228             :                        usable_pages);
    2229             : 
    2230             :     /* Set up the segment header and put it in the appropriate bin. */
    2231        2034 :     segment_map->header->magic =
    2232        2034 :         DSA_SEGMENT_HEADER_MAGIC ^ area->control->handle ^ new_index;
    2233        2034 :     segment_map->header->usable_pages = usable_pages;
    2234        2034 :     segment_map->header->size = total_size;
    2235        2034 :     segment_map->header->bin = contiguous_pages_to_segment_bin(usable_pages);
    2236        2034 :     segment_map->header->prev = DSA_SEGMENT_INDEX_NONE;
    2237        2034 :     segment_map->header->next =
    2238        2034 :         area->control->segment_bins[segment_map->header->bin];
    2239        2034 :     segment_map->header->freed = false;
    2240        2034 :     area->control->segment_bins[segment_map->header->bin] = new_index;
    2241        2034 :     if (segment_map->header->next != DSA_SEGMENT_INDEX_NONE)
    2242             :     {
    2243             :         dsa_segment_map *next =
    2244           0 :             get_segment_by_index(area, segment_map->header->next);
    2245             : 
    2246             :         Assert(next->header->bin == segment_map->header->bin);
    2247           0 :         next->header->prev = new_index;
    2248             :     }
    2249             : 
    2250        2034 :     return segment_map;
    2251             : }
    2252             : 
    2253             : /*
    2254             :  * Check if any segments have been freed by destroy_superblock, so we can
    2255             :  * detach from them in this backend.  This function is called by
    2256             :  * dsa_get_address and dsa_free to make sure that a dsa_pointer they have
    2257             :  * received can be resolved to the correct segment.
    2258             :  *
    2259             :  * The danger we want to defend against is that there could be an old segment
    2260             :  * mapped into a given slot in this backend, and the dsa_pointer they have
    2261             :  * might refer to some new segment in the same slot.  So those functions must
    2262             :  * be sure to process all instructions to detach from a freed segment that had
    2263             :  * been generated by the time this process received the dsa_pointer, before
    2264             :  * they call get_segment_by_index.
    2265             :  */
    2266             : static void
    2267    15583072 : check_for_freed_segments(dsa_area *area)
    2268             : {
    2269             :     size_t      freed_segment_counter;
    2270             : 
    2271             :     /*
    2272             :      * Any other process that has freed a segment has incremented
    2273             :      * freed_segment_counter while holding an LWLock, and that must precede
    2274             :      * any backend creating a new segment in the same slot while holding an
    2275             :      * LWLock, and that must precede the creation of any dsa_pointer pointing
    2276             :      * into the new segment which might reach us here, and the caller must
    2277             :      * have sent the dsa_pointer to this process using appropriate memory
    2278             :      * synchronization (some kind of locking or atomic primitive or system
    2279             :      * call).  So all we need to do on the reading side is ask for the load of
    2280             :      * freed_segment_counter to follow the caller's load of the dsa_pointer it
    2281             :      * has, and we can be sure to detect any segments that had been freed as
    2282             :      * of the time that the dsa_pointer reached this process.
    2283             :      */
    2284    15583072 :     pg_read_barrier();
    2285    15583072 :     freed_segment_counter = area->control->freed_segment_counter;
    2286    15583072 :     if (unlikely(area->freed_segment_counter != freed_segment_counter))
    2287             :     {
    2288             :         /* Check all currently mapped segments to find what's been freed. */
    2289           0 :         LWLockAcquire(DSA_AREA_LOCK(area), LW_EXCLUSIVE);
    2290           0 :         check_for_freed_segments_locked(area);
    2291           0 :         LWLockRelease(DSA_AREA_LOCK(area));
    2292             :     }
    2293    15583072 : }
    2294             : 
    2295             : /*
    2296             :  * Workhorse for check_for_freed_segments(), and also used directly in path
    2297             :  * where the area lock is already held.  This should be called after acquiring
    2298             :  * the lock but before looking up any segment by index number, to make sure we
    2299             :  * unmap any stale segments that might have previously had the same index as a
    2300             :  * current segment.
    2301             :  */
    2302             : static void
    2303       25924 : check_for_freed_segments_locked(dsa_area *area)
    2304             : {
    2305             :     size_t      freed_segment_counter;
    2306             :     int         i;
    2307             : 
    2308             :     Assert(LWLockHeldByMe(DSA_AREA_LOCK(area)));
    2309       25924 :     freed_segment_counter = area->control->freed_segment_counter;
    2310       25924 :     if (unlikely(area->freed_segment_counter != freed_segment_counter))
    2311             :     {
    2312           0 :         for (i = 0; i <= area->high_segment_index; ++i)
    2313             :         {
    2314           0 :             if (area->segment_maps[i].header != NULL &&
    2315           0 :                 area->segment_maps[i].header->freed)
    2316             :             {
    2317           0 :                 dsm_detach(area->segment_maps[i].segment);
    2318           0 :                 area->segment_maps[i].segment = NULL;
    2319           0 :                 area->segment_maps[i].header = NULL;
    2320           0 :                 area->segment_maps[i].mapped_address = NULL;
    2321             :             }
    2322             :         }
    2323           0 :         area->freed_segment_counter = freed_segment_counter;
    2324             :     }
    2325       25924 : }
    2326             : 
    2327             : /*
    2328             :  * Re-bin segment if it's no longer in the appropriate bin.
    2329             :  */
    2330             : static void
    2331        8404 : rebin_segment(dsa_area *area, dsa_segment_map *segment_map)
    2332             : {
    2333             :     size_t      new_bin;
    2334             :     dsa_segment_index segment_index;
    2335             : 
    2336        8404 :     new_bin = contiguous_pages_to_segment_bin(fpm_largest(segment_map->fpm));
    2337        8404 :     if (segment_map->header->bin == new_bin)
    2338        3864 :         return;
    2339             : 
    2340             :     /* Remove it from its current bin. */
    2341        4540 :     unlink_segment(area, segment_map);
    2342             : 
    2343             :     /* Push it onto the front of its new bin. */
    2344        4540 :     segment_index = get_segment_index(area, segment_map);
    2345        4540 :     segment_map->header->prev = DSA_SEGMENT_INDEX_NONE;
    2346        4540 :     segment_map->header->next = area->control->segment_bins[new_bin];
    2347        4540 :     segment_map->header->bin = new_bin;
    2348        4540 :     area->control->segment_bins[new_bin] = segment_index;
    2349        4540 :     if (segment_map->header->next != DSA_SEGMENT_INDEX_NONE)
    2350             :     {
    2351             :         dsa_segment_map *next;
    2352             : 
    2353          26 :         next = get_segment_by_index(area, segment_map->header->next);
    2354             :         Assert(next->header->bin == new_bin);
    2355          26 :         next->header->prev = segment_index;
    2356             :     }
    2357             : }

Generated by: LCOV version 1.16