/** * @file veil_shmem.c * \code * Author: Marc Munro * Copyright (c) 2005 - 2018 Marc Munro * License: BSD * * \endcode * @brief * Functions for dealing with veil shared memory. * * This provides dynamic memory allocation, like malloc, from chunks of * shared memory allocated from the Postgres shared memory pool. In * order to be able to reset and reload shared memory structures while * other backends continue to use the existing structures, a shared * memory reset creates a new context, or switches to an existing one * that is no longer in use. No more than two separate contexts will be * created. * * Each context of veil shared memory is associated with a shared hash, * which is used to store veil's shared variables. A specially named * variable, VEIL_SHMEMCTL appears only in context0 and contains a * reference to chunk0, and the ShmemCtl structure. From this structure * we can identify the current context, the initial chunks for each * active context, and whether a context switch is in progress. * * A context switch takes place in 3 steps: * - preparation, in which we determine if a context switch is allowed, * initialise the new context and record the fact that we are in the * process of switching. All subsequent operations in the current * backend will work in the new context, while other backends will * continue to use the original context * - initialisation of the new context, variables, etc. This is done * by the user-space function veil_init(). * - switchover, when all other processes gain access to the newly * initialised context. They may continue to use the previous * context for the duration of their current transactions. * * To access shared variable "x" in a new session, the following steps * are taken: * - We access the hash "VEIL_SHARED1_nnn" (where nnn is the oid of our * database). This gives us a reference to the ShmemCtl structure. * We record hash0 and shared_meminfo on the way. * - We access ShemCtl to identify the current hash and current * context. * - We look up variable "x" in the current hash, and if we have to * allocate space for it, allocate it from the current context. * * Note that We use a dynamically allocated LWLock, VeilLWLock to protect * our shared control structures. * */ #include "postgres.h" #include "utils/hsearch.h" #include "storage/pg_shmem.h" #include "storage/shmem.h" #include "storage/lwlock.h" #include "storage/procarray.h" #include "access/xact.h" #include "access/transam.h" #include "miscadmin.h" #include "veil_version.h" #include "veil_shmem.h" #include "veil_funcs.h" /** * shared_meminfo provides access to the ShmemCtl structure allocated in * context 0. */ static ShmemCtl *shared_meminfo = NULL; /** * Whether the current backend is in the process of switching contexts. * If so, it will be setting up the non-current context in readiness for * making it available to all other backends. */ static bool prepared_for_switch = false; /** * The LWLock that Veil will use for managing concurrent access to * shared memory. It is initialised in _PG_init() to a lock id that is * distinct from any that will be dynamically allocated. */ static LWLockId VeilLWLock = 0; /** * The LWLock to be used while initially setting up shared memory and * allocating a veil database-specific LWLock. Initialised in * _PG_Init() */ static LWLockId InitialLWLock = 0; /** * Return the index of the other context from the one supplied. * * @param x the context for which we want the other one. * * @return the opposite context to that of x. */ #define OTHER_CONTEXT(x) (x ? 0: 1) /** * The MemContext that we use to manage our tranche of LWLocks */ static MemContext *lwlock_context; /** * Name of tranche of LWLocks used by veil. */ static char *TRANCHE_NAME = "veil"; /** * Return the next LWLock from our tranche. * Note that locking is the responsibility of the caller. */ static LWLock * NextLWLock() { // TODO: Ensure we don't exceed the number of locks in our tranche if (lwlock_context->lwlock_idx > 0) { lwlock_context->lwlock_idx--; } else { // Error ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("veil: out of LWLocks"))); } return &(lwlock_context->lwlock_tranche[lwlock_context->lwlock_idx].lock); } /** * Veil's startup function. This should be run when the Veil shared * library is loaded by postgres. * */ void _PG_init() { int veil_dbs; /* See definitions of the following two variables, for comments. */ VeilLWLock = AddinShmemInitLock; InitialLWLock = AddinShmemInitLock; /* Define GUCs for veil */ veil_config_init(); veil_dbs = veil_dbs_in_cluster(); /* Request a Veil-specific shared memory context */ RequestAddinShmemSpace(2 * veil_shmem_context_size() * veil_dbs); /* Request LWLocks for later use by all backends */ RequestNamedLWLockTranche(TRANCHE_NAME, veil_dbs); } /** * Create/attach to the shared hash identified by hashname. Return a * pointer to an HTAB that references the shared hash. All locking is * handled by the caller. * * @param hashname * * @return Pointer to HTAB referencing the shared hash. */ static HTAB * create_shared_hash(const char *hashname) { HASHCTL hashctl; HTAB *result; char *db_hashname; int hash_elems = veil_shared_hash_elems(); /* Add the current database oid into the hashname so that it is * distinct from the shared hash for other databases in the * cluster. */ db_hashname = (char *) vl_malloc(HASH_KEYLEN); (void) snprintf(db_hashname, HASH_KEYLEN - 1, "%s_%u", hashname, MyDatabaseId); hashctl.keysize = HASH_KEYLEN; hashctl.entrysize = sizeof(VarEntry); result = ShmemInitHash(db_hashname, hash_elems, hash_elems, &hashctl, HASH_ELEM); pfree(db_hashname); return result; } /** * Return reference to the HTAB for the shared hash associated with * context 0. * * @return Pointer to HTAB referencing shared hash for context 0. */ static HTAB * get_hash0() { static HTAB *hash0 = NULL; if (!hash0) { hash0 = create_shared_hash("VEIL_SHARED1"); } return hash0; } /** * Return reference to the HTAB for the shared hash associated with * context 1. * * @return Pointer to HTAB referencing shared hash for context 1. */ static HTAB * get_hash1() { static HTAB *hash1 = NULL; if (!hash1) { hash1 = create_shared_hash("VEIL_SHARED2"); } return hash1; } /** * Allocate or attach to, a new chunk of shared memory for a named * memory context. * * @param name The name * @param size The size of the shared memory chunk to be allocated. * @param p_found Pointer to boolean that will identify whether this * chunk has already been initialised. * * @return Pointer to chunk of shared memory. */ static MemContext * get_shmem_context(char *name, size_t size, bool *p_found) { int i; MemContext *context; char *uniqname = (char *) vl_malloc(strlen(name) + 16); int max_dbs = veil_dbs_in_cluster(); for (i = 0; i < max_dbs; i++) { (void) sprintf(uniqname, "%s_%d", name, i); context = ShmemInitStruct(uniqname, size, p_found);; if (!context) { ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("veil: cannot allocate shared memory(1)"))); } if (*p_found) { /* Already exists. Check database id. */ if (context->db_id == MyDatabaseId) { /* This context is the one for the current database, * nothing else to do. */ return context; } } else { /* We Just allocated our first context */ context->db_id = MyDatabaseId; context->next = sizeof(MemContext); context->limit = size; context->lwlock = VeilLWLock; if (i == 0) { /* This context is the very first MemContext for the * cluster: this is the one used to manage our LWLocks * tranche. */ context->lwlock_tranche = GetNamedLWLockTranche(TRANCHE_NAME); context->lwlock_idx = max_dbs; lwlock_context = context; } return context; } } /* We reach this point if no existing contexts are allocated to our * database. Now we check those existing contexts to see whether * they are still in use. If not, we will redeploy them. */ for (i = 0; i < max_dbs; i++) { (void) sprintf(uniqname, "%s_%d", name, i); context = ShmemInitStruct(uniqname, size, p_found);; if (!context) { ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("veil: cannot allocate shared memory(2)"))); } if (*p_found) { /* Is this context for a still existant database? */ if (!vl_db_exists(context->db_id)) { /* We can re-use this context. */ context->db_id = MyDatabaseId; context->next = sizeof(MemContext); context->limit = size; *p_found = false; /* Tell the caller that init is * required */ return context; } } else { /* We didn't find an unused context, so now we have created * a new one. */ context->db_id = MyDatabaseId; context->next = sizeof(MemContext); context->limit = size; return context; } } ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("veil: no more shared memory contexts allowed"))); return NULL; } /* Forward ref, required by next function. */ static void shmalloc_init(void); /** * Return the id (index) of the current context for this session * * @return The current context id */ static int get_cur_context_id() { static bool initialised = false; int context; if (!initialised) { shmalloc_init(); initialised = true; } context = shared_meminfo->current_context; if (prepared_for_switch) { context = OTHER_CONTEXT(context); } else { /* Check for the default context being for a later transaction * than current and, if so, use the other one. */ if (TransactionIdPrecedes(GetCurrentTransactionId(), shared_meminfo->xid[context])) { context = OTHER_CONTEXT(context); } } return context; } /** * Return pointer to shared memory allocated for the current context. * * @return The current context. */ static MemContext * get_cur_context() { int context; context = get_cur_context_id(); return shared_meminfo->context[context]; } /** * Dynamically allocate a piece of shared memory from the current * context, doing no locking. * * @param context The context in which we are operating * @param size The size of the requested piece of memory. * * @return Pointer to dynamically allocated memory. */ static void * do_vl_shmalloc(MemContext *context, size_t size) { void *result = NULL; size_t amount = (size_t) MAXALIGN(size); if ((amount + context->next) <= context->limit) { result = (void *) ((char *) context + context->next); context->next += amount; } else { ereport(ERROR, (ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("veil: out of shared memory")))); } return result; } /** * Dynamically allocate a piece of shared memory from the current context. * * @param size The size of the requested piece of memory. * * @return Pointer to dynamically allocated memory. */ void * vl_shmalloc(size_t size) { MemContext *context; void *result; context = get_cur_context(); LWLockAcquire(VeilLWLock, LW_EXCLUSIVE); result = do_vl_shmalloc(context, size); LWLockRelease(VeilLWLock); return result; } /** * Free a piece of shared memory within the current context. Currently * this does nothing as implementation of freeing of shared memory has * been deferred. * * @param mem Pointer to the memory to be freed. * */ void vl_free(void *mem) { return; } /** * Attach to, creating and initialising as necessary, the shared memory * control structure. Record this for the session in shared_meminfo. */ static void shmalloc_init(void) { VeilLWLock = AddinShmemInitLock; InitialLWLock = AddinShmemInitLock; if (!shared_meminfo) { VarEntry *var; MemContext *context0; MemContext *context1; bool found = false; HTAB *hash0; size_t size; size = veil_shmem_context_size(); LWLockAcquire(InitialLWLock, LW_EXCLUSIVE); context0 = get_shmem_context("VEIL_SHMEM0", size, &found); if (found && context0->memctl) { shared_meminfo = context0->memctl; VeilLWLock = shared_meminfo->veil_lwlock; /* By aquiring and releasing this lock, we ensure that Veil * shared memory has been fully initialised, by a process * following the else clause of this code path. */ LWLockAcquire(VeilLWLock, LW_EXCLUSIVE); LWLockRelease(InitialLWLock); LWLockRelease(VeilLWLock); } else { /* Do minimum amount of initialisation while holding * the initial lock. We don't want to do anything that * may cause other locks to be aquired as this could lead * to deadlock with other add-ins. Instead, we aquire the * Veil-specific lock before finishing the initialisation. */ shared_meminfo = do_vl_shmalloc(context0, sizeof(ShmemCtl)); if (context0->lwlock != InitialLWLock) { /* Re-use the LWLock previously allocated to this memory * context */ VeilLWLock = context0->lwlock; } else { /* Allocate new LWLock for this new shared memory * context */ VeilLWLock = NextLWLock(); } /* Record the lock id in context0 (for possible re-use if * the current database is dropped and a new veil-using * database created), and in the shared_meminfo struct */ context0->lwlock = VeilLWLock; shared_meminfo->veil_lwlock = VeilLWLock; /* Exchange the initial lock for our Veil-specific one. */ LWLockAcquire(VeilLWLock, LW_EXCLUSIVE); LWLockRelease(InitialLWLock); /* Now do the rest of the Veil shared memory initialisation */ /* Set up the other memory context */ context1 = get_shmem_context("VEIL_SHMEM1", size, &found); /* Record location of shmemctl structure in each context */ context0->memctl = shared_meminfo; context1->memctl = shared_meminfo; /* Finish initialising the shmemctl structure */ shared_meminfo->type = OBJ_SHMEMCTL; shared_meminfo->current_context = 0; shared_meminfo->total_allocated[0] = size; shared_meminfo->total_allocated[1] = size; shared_meminfo->switching = false; shared_meminfo->context[0] = context0; shared_meminfo->context[1] = context1; shared_meminfo->xid[0] = GetCurrentTransactionId(); shared_meminfo->xid[1] = shared_meminfo->xid[0]; shared_meminfo->initialised = true; /* Set up both shared hashes */ hash0 = get_hash0(); (void) get_hash1(); /* Record the shmemctl structure in hash0 */ var = (VarEntry *) hash_search(hash0, (void *) "VEIL_SHMEMCTL", HASH_ENTER, &found); var->obj = (Object *) shared_meminfo; var->shared = true; var = (VarEntry *) hash_search(hash0, (void *) "VEIL_SHMEMCTL", HASH_ENTER, &found); LWLockRelease(VeilLWLock); } } } /** * Return the shared hash for the current context. * * @return Pointer to the HTAB for the current context's shared hash. */ HTAB * vl_get_shared_hash() { int context; HTAB *hash; static bool initialised = false; if (!initialised) { (void) get_cur_context(); /* Ensure shared memory is set up. */ initialised = true; } context = get_cur_context_id(); if (context == 0) { hash = get_hash0(); } else { hash = get_hash1(); } return hash; } /** * Reset one of the shared hashes. This is one of the final steps in a * context switch. * * @return hash The shared hash that is to be reset. */ static void clear_hash(HTAB *hash) { static HASH_SEQ_STATUS status; VarEntry *var; hash_seq_init(&status, hash); while ((var = hash_seq_search(&status))) { if (strncmp("VEIL_SHMEMCTL", var->key, strlen("VEIL_SHMEMCTL")) != 0) { (void) hash_search(hash, var->key, HASH_REMOVE, NULL); } } } /** * Prepare for a switch to the alternate context. Switching will * only be allowed if there are no transactions that may still be using * the context to which we are switching, and there is no other * process attempting the switch. * * @return true if the switch preparation was successful. */ bool vl_prepare_context_switch() { int context_curidx; int context_newidx; HTAB *hash0 = get_hash0(); /* We must not attempt to create hashes * on the fly below as they also acquire * the lock */ HTAB *hash1 = get_hash1(); TransactionId oldest_xid; MemContext *context; (void) get_cur_context(); /* Ensure shared memory is set up */ LWLockAcquire(VeilLWLock, LW_EXCLUSIVE); if (shared_meminfo->switching) { /* Another process is performing the switch */ LWLockRelease(VeilLWLock); return false; } shared_meminfo->switching = true; /* We have claimed the switch. If we decide that we cannot proceed, * we will return it to its previous state. */ context_curidx = shared_meminfo->current_context; context_newidx = OTHER_CONTEXT(context_curidx); /* In case the alternate context has been used before, we must * clear it. */ oldest_xid = GetOldestXmin(false, true); if (TransactionIdPrecedes(oldest_xid, shared_meminfo->xid[context_curidx])) { /* There is a transaction running that precedes the time of * the last context switch. That transaction may still be * using the chunk to which we wish to switch. We cannot * allow the switch. */ shared_meminfo->switching = false; LWLockRelease(VeilLWLock); return false; } else { /* It looks like we can safely make the switch. Reset the * new context, and make it the current context for this * session only. */ context = shared_meminfo->context[context_newidx]; context->next = sizeof(MemContext); /* If we are switching to context 0, reset the next field of * the first chunk to leave space for the ShmemCtl struct. */ if (context_newidx == 0) { context->next += sizeof(ShmemCtl); clear_hash(hash0); } else { clear_hash(hash1); } } LWLockRelease(VeilLWLock); prepared_for_switch = true; return true; } /** * Complete the context switch started by vl_prepare_context_switch(). * Raise an ERROR if the context switch cannot be completed. * * @return true if the context switch is successfully completed. */ bool vl_complete_context_switch() { int context_curidx; int context_newidx; if (!prepared_for_switch) { ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("failed to complete context switch"), errdetail("Not prepared for switch - " "invalid state for operation"))); } LWLockAcquire(VeilLWLock, LW_EXCLUSIVE); context_curidx = shared_meminfo->current_context; context_newidx = OTHER_CONTEXT(context_curidx); if (!shared_meminfo->switching) { /* We do not claim to be switching. We should. */ LWLockRelease(VeilLWLock); ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("failed to complete context switch"), errdetail("Session does not have switching set to true- " "invalid state for operation"))); } shared_meminfo->switching = false; shared_meminfo->current_context = context_newidx; shared_meminfo->xid[context_newidx] = GetCurrentTransactionId(); LWLockRelease(VeilLWLock); prepared_for_switch = false; return true; } /** * In desparation, if we are unable to complete a context switch, we * should use this function. */ void vl_force_context_switch() { int context_curidx; int context_newidx; MemContext *context; HTAB *hash0 = get_hash0(); HTAB *hash1 = get_hash1(); (void) get_cur_context(); LWLockAcquire(VeilLWLock, LW_EXCLUSIVE); context_curidx = shared_meminfo->current_context; context_newidx = OTHER_CONTEXT(context_curidx); /* Clear the alternate context. */ context = shared_meminfo->context[context_newidx]; context->next = sizeof(MemContext); /* If we are switching to context 0, reset the next field of * the first chunk to leave space for the ShmemCtl struct. */ if (context_newidx == 0) { context->next += sizeof(ShmemCtl); clear_hash(hash0); } else { clear_hash(hash1); } shared_meminfo->switching = false; shared_meminfo->current_context = context_newidx; shared_meminfo->xid[context_newidx] = GetCurrentTransactionId(); shared_meminfo->xid[0] = GetCurrentTransactionId(); LWLockRelease(VeilLWLock); prepared_for_switch = false; }