diff options
author | Teodor Sigaev | 2016-04-01 13:42:24 +0000 |
---|---|---|
committer | Teodor Sigaev | 2016-04-01 13:42:24 +0000 |
commit | 9ee014fc899a28a198492b074e32b60ed8915ea9 (patch) | |
tree | 107c5cdbac932b383645f94b531b9e0d5369476c /contrib/bloom/blutils.c | |
parent | 4e56e5a6de766a6983ce723b1945d68a4e098a06 (diff) |
Bloom index contrib module
Module provides new access method. It is actually a simple Bloom filter
implemented as pgsql's index. It could give some benefits on search
with large number of columns.
Module is a single way to test generic WAL interface committed earlier.
Author: Teodor Sigaev, Alexander Korotkov
Reviewers: Aleksander Alekseev, Michael Paquier, Jim Nasby
Diffstat (limited to 'contrib/bloom/blutils.c')
-rw-r--r-- | contrib/bloom/blutils.c | 463 |
1 files changed, 463 insertions, 0 deletions
diff --git a/contrib/bloom/blutils.c b/contrib/bloom/blutils.c new file mode 100644 index 00000000000..b86f51fb825 --- /dev/null +++ b/contrib/bloom/blutils.c @@ -0,0 +1,463 @@ +/*------------------------------------------------------------------------- + * + * blutils.c + * Bloom index utilities. + * + * Portions Copyright (c) 2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1990-1993, Regents of the University of California + * + * IDENTIFICATION + * contrib/bloom/blutils.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/amapi.h" +#include "access/generic_xlog.h" +#include "catalog/index.h" +#include "storage/lmgr.h" +#include "miscadmin.h" +#include "storage/bufmgr.h" +#include "storage/indexfsm.h" +#include "utils/memutils.h" +#include "access/reloptions.h" +#include "storage/freespace.h" +#include "storage/indexfsm.h" + +#include "bloom.h" + +/* Signature dealing macros */ +#define BITSIGNTYPE (BITS_PER_BYTE * sizeof(SignType)) +#define GETWORD(x,i) ( *( (SignType*)(x) + (int)( (i) / BITSIGNTYPE ) ) ) +#define CLRBIT(x,i) GETWORD(x,i) &= ~( 0x01 << ( (i) % BITSIGNTYPE ) ) +#define SETBIT(x,i) GETWORD(x,i) |= ( 0x01 << ( (i) % BITSIGNTYPE ) ) +#define GETBIT(x,i) ( (GETWORD(x,i) >> ( (i) % BITSIGNTYPE )) & 0x01 ) + +PG_FUNCTION_INFO_V1(blhandler); + +/* Kind of relation optioms for bloom index */ +static relopt_kind bl_relopt_kind; + +static int32 myRand(); +static void mySrand(uint32 seed); + +/* + * Module initialize function: initilized relation options. + */ +void +_PG_init(void) +{ + int i; + char buf[16]; + + bl_relopt_kind = add_reloption_kind(); + + add_int_reloption(bl_relopt_kind, "length", + "Length of signature in uint16 type", 5, 1, 256); + + for (i = 0; i < INDEX_MAX_KEYS; i++) + { + snprintf(buf, 16, "col%d", i + 1); + add_int_reloption(bl_relopt_kind, buf, + "Number of bits for corresponding column", 2, 1, 2048); + } +} + +/* + * Bloom handler function: return IndexAmRoutine with access method parameters + * and callbacks. + */ +Datum +blhandler(PG_FUNCTION_ARGS) +{ + IndexAmRoutine *amroutine = makeNode(IndexAmRoutine); + + amroutine->amstrategies = 1; + amroutine->amsupport = 1; + amroutine->amcanorder = false; + amroutine->amcanorderbyop = false; + amroutine->amcanbackward = false; + amroutine->amcanunique = false; + amroutine->amcanmulticol = true; + amroutine->amoptionalkey = true; + amroutine->amsearcharray = false; + amroutine->amsearchnulls = false; + amroutine->amstorage = false; + amroutine->amclusterable = false; + amroutine->ampredlocks = false; + amroutine->amkeytype = 0; + + amroutine->aminsert = blinsert; + amroutine->ambeginscan = blbeginscan; + amroutine->amgettuple = NULL; + amroutine->amgetbitmap = blgetbitmap; + amroutine->amrescan = blrescan; + amroutine->amendscan = blendscan; + amroutine->ammarkpos = NULL; + amroutine->amrestrpos = NULL; + amroutine->ambuild = blbuild; + amroutine->ambuildempty = blbuildempty; + amroutine->ambulkdelete = blbulkdelete; + amroutine->amvacuumcleanup = blvacuumcleanup; + amroutine->amcanreturn = NULL; + amroutine->amcostestimate = blcostestimate; + amroutine->amoptions = bloptions; + amroutine->amvalidate = blvalidate; + + PG_RETURN_POINTER(amroutine); +} + +/* + * Fill BloomState structure for particular index. + */ +void +initBloomState(BloomState *state, Relation index) +{ + int i; + + state->nColumns = index->rd_att->natts; + + /* Initialize hash function for each attribute */ + for (i = 0; i < index->rd_att->natts; i++) + { + fmgr_info_copy(&(state->hashFn[i]), + index_getprocinfo(index, i + 1, BLOOM_HASH_PROC), + CurrentMemoryContext); + } + + /* Initialize amcache if needed with options from metapage */ + if (!index->rd_amcache) + { + Buffer buffer; + Page page; + BloomMetaPageData *meta; + BloomOptions *opts; + + opts = MemoryContextAlloc(index->rd_indexcxt, sizeof(BloomOptions)); + + buffer = ReadBuffer(index, BLOOM_METAPAGE_BLKNO); + LockBuffer(buffer, BUFFER_LOCK_SHARE); + + page = BufferGetPage(buffer); + + if (!BloomPageIsMeta(page)) + elog(ERROR, "Relation is not a bloom index"); + meta = BloomPageGetMeta(BufferGetPage(buffer)); + + if (meta->magickNumber != BLOOM_MAGICK_NUMBER) + elog(ERROR, "Relation is not a bloom index"); + + *opts = meta->opts; + + UnlockReleaseBuffer(buffer); + + index->rd_amcache = (void *) opts; + } + + state->opts = (BloomOptions *) index->rd_amcache; + state->sizeOfBloomTuple = BLOOMTUPLEHDRSZ + + sizeof(SignType) * state->opts->bloomLength; +} + +/* + * Random generator copied from FreeBSD. Using own random generator here for + * two reasons: + * + * 1) In this case random numbers are used for on-disk storage. Usage of + * PostgreSQL number generator would obstruct it from all possible changes. + * 2) Changing seed of PostgreSQL random generator would be undesirable side + * effect. + */ +static int32 next; + +static int32 +myRand() +{ + /* + * Compute x = (7^5 * x) mod (2^31 - 1) + * without overflowing 31 bits: + * (2^31 - 1) = 127773 * (7^5) + 2836 + * From "Random number generators: good ones are hard to find", + * Park and Miller, Communications of the ACM, vol. 31, no. 10, + * October 1988, p. 1195. + */ + int32 hi, lo, x; + + /* Must be in [1, 0x7ffffffe] range at this point. */ + hi = next / 127773; + lo = next % 127773; + x = 16807 * lo - 2836 * hi; + if (x < 0) + x += 0x7fffffff; + next = x; + /* Transform to [0, 0x7ffffffd] range. */ + return (x - 1); +} + +void +mySrand(uint32 seed) +{ + next = seed; + /* Transform to [1, 0x7ffffffe] range. */ + next = (next % 0x7ffffffe) + 1; +} + +/* + * Add bits of given value to the signature. + */ +void +signValue(BloomState *state, SignType *sign, Datum value, int attno) +{ + uint32 hashVal; + int nBit, + j; + + /* + * init generator with "column's" number to get "hashed" seed for new + * value. We don't want to map the same numbers from different columns + * into the same bits! + */ + mySrand(attno); + + /* + * Init hash sequence to map our value into bits. the same values in + * different columns will be mapped into different bits because of step + * above + */ + hashVal = DatumGetInt32(FunctionCall1(&state->hashFn[attno], value)); + mySrand(hashVal ^ myRand()); + + for (j = 0; j < state->opts->bitSize[attno]; j++) + { + /* prevent mutiple evaluation */ + nBit = myRand() % (state->opts->bloomLength * BITSIGNTYPE); + SETBIT(sign, nBit); + } +} + +/* + * Make bloom tuple from values. + */ +BloomTuple * +BloomFormTuple(BloomState *state, ItemPointer iptr, Datum *values, bool *isnull) +{ + int i; + BloomTuple *res = (BloomTuple *) palloc0(state->sizeOfBloomTuple); + + res->heapPtr = *iptr; + + /* Blooming each column */ + for (i = 0; i < state->nColumns; i++) + { + /* skip nulls */ + if (isnull[i]) + continue; + + signValue(state, res->sign, values[i], i); + } + + return res; +} + +/* + * Add new bloom tuple to the page. Returns true if new tuple was successfully + * added to the page. Returns false if it doesn't git the page. + */ +bool +BloomPageAddItem(BloomState *state, Page page, BloomTuple *tuple) +{ + BloomTuple *itup; + BloomPageOpaque opaque; + Pointer ptr; + + /* Does new tuple fit the page */ + if (BloomPageGetFreeSpace(state, page) < state->sizeOfBloomTuple) + return false; + + /* Copy new tuple to the end of page */ + opaque = BloomPageGetOpaque(page); + itup = BloomPageGetTuple(state, page, opaque->maxoff + 1); + memcpy((Pointer) itup, (Pointer) tuple, state->sizeOfBloomTuple); + + /* Adjust maxoff and pd_lower */ + opaque->maxoff++; + ptr = (Pointer) BloomPageGetTuple(state, page, opaque->maxoff + 1); + ((PageHeader) page)->pd_lower = ptr - page; + + return true; +} + +/* + * Allocate a new page (either by recycling, or by extending the index file) + * The returned buffer is already pinned and exclusive-locked + * Caller is responsible for initializing the page by calling BloomInitBuffer + */ +Buffer +BloomNewBuffer(Relation index) +{ + Buffer buffer; + bool needLock; + + /* First, try to get a page from FSM */ + for (;;) + { + BlockNumber blkno = GetFreeIndexPage(index); + + if (blkno == InvalidBlockNumber) + break; + + buffer = ReadBuffer(index, blkno); + + /* + * We have to guard against the possibility that someone else already + * recycled this page; the buffer may be locked if so. + */ + if (ConditionalLockBuffer(buffer)) + { + Page page = BufferGetPage(buffer); + + if (PageIsNew(page)) + return buffer; /* OK to use, if never initialized */ + + if (BloomPageIsDeleted(page)) + return buffer; /* OK to use */ + + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); + } + + /* Can't use it, so release buffer and try again */ + ReleaseBuffer(buffer); + } + + /* Must extend the file */ + needLock = !RELATION_IS_LOCAL(index); + if (needLock) + LockRelationForExtension(index, ExclusiveLock); + + buffer = ReadBuffer(index, P_NEW); + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + + if (needLock) + UnlockRelationForExtension(index, ExclusiveLock); + + return buffer; +} + +/* + * Initialize bloom page. + */ +void +BloomInitPage(Page page, uint16 flags) +{ + BloomPageOpaque opaque; + + PageInit(page, BLCKSZ, sizeof(BloomPageOpaqueData)); + + opaque = BloomPageGetOpaque(page); + memset(opaque, 0, sizeof(BloomPageOpaqueData)); + opaque->flags = flags; +} + +/* + * Adjust options of bloom index. + */ +static void +adjustBloomOptions(BloomOptions *opts) +{ + int i; + + /* Default length of bloom filter is 5 of 16-bit integers */ + if (opts->bloomLength <= 0) + opts->bloomLength = 5; + else + opts->bloomLength = opts->bloomLength; + + /* Check singnature length */ + for (i = 0; i < INDEX_MAX_KEYS; i++) + { + /* + * Zero and negative number of bits is meaningless. Also setting + * more bits than signature have seems useless. Replace both cases + * with 2 bits default. + */ + if (opts->bitSize[i] <= 0 + || opts->bitSize[i] >= opts->bloomLength * sizeof(SignType)) + opts->bitSize[i] = 2; + } +} + +/* + * Initialize metapage for bloom index. + */ +void +BloomInitMetapage(Relation index) +{ + Page metaPage; + Buffer metaBuffer; + BloomMetaPageData *metadata; + GenericXLogState *state; + + /* + * Make a new buffer, since it first buffer it should be associated with + * block number 0 (BLOOM_METAPAGE_BLKNO). + */ + metaBuffer = BloomNewBuffer(index); + Assert(BufferGetBlockNumber(metaBuffer) == BLOOM_METAPAGE_BLKNO); + + /* Initialize bloom index options */ + if (!index->rd_options) + index->rd_options = palloc0(sizeof(BloomOptions)); + adjustBloomOptions((BloomOptions *) index->rd_options); + + /* Initialize contents of meta page */ + state = GenericXLogStart(index); + metaPage = GenericXLogRegister(state, metaBuffer, true); + + BloomInitPage(metaPage, BLOOM_META); + metadata = BloomPageGetMeta(metaPage); + memset(metadata, 0, sizeof(BloomMetaPageData)); + metadata->magickNumber = BLOOM_MAGICK_NUMBER; + metadata->opts = *((BloomOptions *) index->rd_options); + ((PageHeader) metaPage)->pd_lower += sizeof(BloomMetaPageData); + + GenericXLogFinish(state); + UnlockReleaseBuffer(metaBuffer); +} + +/* + * Initialize options for bloom index. + */ +bytea * +bloptions(Datum reloptions, bool validate) +{ + relopt_value *options; + int numoptions; + BloomOptions *rdopts; + relopt_parse_elt tab[INDEX_MAX_KEYS + 1]; + int i; + char buf[16]; + + /* Option for length of signature */ + tab[0].optname = "length"; + tab[0].opttype = RELOPT_TYPE_INT; + tab[0].offset = offsetof(BloomOptions, bloomLength); + + /* Number of bits for each of possible columns: col1, col2, ... */ + for (i = 0; i < INDEX_MAX_KEYS; i++) + { + snprintf(buf, sizeof(buf), "col%d", i + 1); + tab[i + 1].optname = pstrdup(buf); + tab[i + 1].opttype = RELOPT_TYPE_INT; + tab[i + 1].offset = offsetof(BloomOptions, bitSize[i]); + } + + options = parseRelOptions(reloptions, validate, bl_relopt_kind, &numoptions); + rdopts = allocateReloptStruct(sizeof(BloomOptions), options, numoptions); + fillRelOptions((void *) rdopts, sizeof(BloomOptions), options, numoptions, + validate, tab, INDEX_MAX_KEYS + 1); + + adjustBloomOptions(rdopts); + + return (bytea *) rdopts; +} |