diff options
Diffstat (limited to 'src/include/access')
| -rw-r--r-- | src/include/access/gin_private.h | 212 |
1 files changed, 169 insertions, 43 deletions
diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h index 5f0fe80e807..3f92c376d72 100644 --- a/src/include/access/gin_private.h +++ b/src/include/access/gin_private.h @@ -32,11 +32,8 @@ typedef struct GinPageOpaqueData { BlockNumber rightlink; /* next page if any */ - OffsetNumber maxoff; /* number entries on GIN_DATA page: number of - * heap ItemPointers on GIN_DATA|GIN_LEAF page - * or number of PostingItems on GIN_DATA & - * ~GIN_LEAF page. On GIN_LIST page, number of - * heap tuples. */ + OffsetNumber maxoff; /* number of PostingItems on GIN_DATA & ~GIN_LEAF page. + * On GIN_LIST page, number of heap tuples. */ uint16 flags; /* see bit definitions below */ } GinPageOpaqueData; @@ -49,6 +46,7 @@ typedef GinPageOpaqueData *GinPageOpaque; #define GIN_LIST (1 << 4) #define GIN_LIST_FULLROW (1 << 5) /* makes sense only on GIN_LIST page */ #define GIN_INCOMPLETE_SPLIT (1 << 6) /* page was split, but parent not updated */ +#define GIN_COMPRESSED (1 << 7) /* Page numbers of fixed-location pages */ #define GIN_METAPAGE_BLKNO (0) @@ -88,7 +86,12 @@ typedef struct GinMetaPageData * GIN version number (ideally this should have been at the front, but too * late now. Don't move it!) * - * Currently 1 (for indexes initialized in 9.1 or later) + * Currently 2 (for indexes initialized in 9.4 or later) + * + * Version 1 (indexes initialized in version 9.1, 9.2 or 9.3), is + * compatible, but may contain uncompressed posting tree (leaf) pages and + * posting lists. They will be converted to compressed format when + * modified. * * Version 0 (indexes initialized in 9.0 or before) is compatible but may * be missing null entries, including both null keys and placeholders. @@ -97,7 +100,7 @@ typedef struct GinMetaPageData int32 ginVersion; } GinMetaPageData; -#define GIN_CURRENT_VERSION 1 +#define GIN_CURRENT_VERSION 2 #define GinPageGetMeta(p) \ ((GinMetaPageData *) PageGetContents(p)) @@ -116,6 +119,8 @@ typedef struct GinMetaPageData #define GinPageSetList(page) ( GinPageGetOpaque(page)->flags |= GIN_LIST ) #define GinPageHasFullRow(page) ( GinPageGetOpaque(page)->flags & GIN_LIST_FULLROW ) #define GinPageSetFullRow(page) ( GinPageGetOpaque(page)->flags |= GIN_LIST_FULLROW ) +#define GinPageIsCompressed(page) ( GinPageGetOpaque(page)->flags & GIN_COMPRESSED ) +#define GinPageSetCompressed(page) ( GinPageGetOpaque(page)->flags |= GIN_COMPRESSED ) #define GinPageIsDeleted(page) ( GinPageGetOpaque(page)->flags & GIN_DELETED) #define GinPageSetDeleted(page) ( GinPageGetOpaque(page)->flags |= GIN_DELETED) @@ -213,13 +218,16 @@ typedef signed char GinNullCategory; #define GinSetPostingTree(itup, blkno) ( GinSetNPosting((itup),GIN_TREE_POSTING), ItemPointerSetBlockNumber(&(itup)->t_tid, blkno) ) #define GinGetPostingTree(itup) GinItemPointerGetBlockNumber(&(itup)->t_tid) -#define GinGetPostingOffset(itup) GinItemPointerGetBlockNumber(&(itup)->t_tid) -#define GinSetPostingOffset(itup,n) ItemPointerSetBlockNumber(&(itup)->t_tid,n) -#define GinGetPosting(itup) ((ItemPointer) ((char*)(itup) + GinGetPostingOffset(itup))) +#define GIN_ITUP_COMPRESSED (1 << 31) +#define GinGetPostingOffset(itup) (GinItemPointerGetBlockNumber(&(itup)->t_tid) & (~GIN_ITUP_COMPRESSED)) +#define GinSetPostingOffset(itup,n) ItemPointerSetBlockNumber(&(itup)->t_tid,(n)|GIN_ITUP_COMPRESSED) +#define GinGetPosting(itup) ((Pointer) ((char*)(itup) + GinGetPostingOffset(itup))) +#define GinItupIsCompressed(itup) (GinItemPointerGetBlockNumber(&(itup)->t_tid) & GIN_ITUP_COMPRESSED) #define GinMaxItemSize \ - MAXALIGN_DOWN(((BLCKSZ - SizeOfPageHeaderData - \ - MAXALIGN(sizeof(GinPageOpaqueData))) / 3 - sizeof(ItemIdData))) + Min(INDEX_SIZE_MASK, \ + MAXALIGN_DOWN(((BLCKSZ - SizeOfPageHeaderData - \ + MAXALIGN(sizeof(GinPageOpaqueData))) / 6 - sizeof(ItemIdData)))) /* * Access macros for non-leaf entry tuples @@ -230,30 +238,59 @@ typedef signed char GinNullCategory; /* * Data (posting tree) pages + * + * Posting tree pages don't store regular tuples. Non-leaf pages contain + * PostingItems, which are pairs of ItemPointers and child block numbers. + * Leaf pages contain GinPostingLists and an uncompressed array of item + * pointers. + * + * In a leaf page, the compressed posting lists are stored after the regular + * page header, one after each other. Although we don't store regular tuples, + * pd_lower is used to indicate the end of the posting lists. After that, free + * space follows. This layout is compatible with the "standard" heap and + * index page layout described in bufpage.h, so that we can e.g set buffer_std + * when writing WAL records. + * + * In the special space is the GinPageOpaque struct. */ +#define GinDataLeafPageGetPostingList(page) \ + (GinPostingList *) ((PageGetContents(page) + MAXALIGN(sizeof(ItemPointerData)))) +#define GinDataLeafPageGetPostingListSize(page) \ + (((PageHeader) page)->pd_lower - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(ItemPointerData))) +#define GinDataLeafPageSetPostingListSize(page, size) \ + { \ + Assert(size <= GinDataLeafMaxContentSize); \ + ((PageHeader) page)->pd_lower = (size) + MAXALIGN(SizeOfPageHeaderData) + MAXALIGN(sizeof(ItemPointerData)); \ + } + +#define GinDataLeafPageIsEmpty(page) \ + (GinPageIsCompressed(page) ? (GinDataLeafPageGetPostingListSize(page) == 0) : (GinPageGetOpaque(page)->maxoff < FirstOffsetNumber)) + +#define GinDataLeafPageGetFreeSpace(page) PageGetExactFreeSpace(page) + #define GinDataPageGetRightBound(page) ((ItemPointer) PageGetContents(page)) +/* + * Pointer to the data portion of a posting tree page. For internal pages, + * that's the beginning of the array of PostingItems. For compressed leaf + * pages, the first compressed posting list. For uncompressed (pre-9.4) leaf + * pages, it's the beginning of the ItemPointer array. + */ #define GinDataPageGetData(page) \ (PageGetContents(page) + MAXALIGN(sizeof(ItemPointerData))) /* non-leaf pages contain PostingItems */ #define GinDataPageGetPostingItem(page, i) \ ((PostingItem *) (GinDataPageGetData(page) + ((i)-1) * sizeof(PostingItem))) -/* leaf pages contain ItemPointers */ -#define GinDataPageGetItemPointer(page, i) \ - ((ItemPointer) (GinDataPageGetData(page) + ((i)-1) * sizeof(ItemPointerData))) -#define GinSizeOfDataPageItem(page) \ - (GinPageIsLeaf(page) ? sizeof(ItemPointerData) : sizeof(PostingItem)) -#define GinDataPageGetFreeSpace(page) \ +#define GinNonLeafDataPageGetFreeSpace(page) \ (BLCKSZ - MAXALIGN(SizeOfPageHeaderData) \ - MAXALIGN(sizeof(ItemPointerData)) \ - - GinPageGetOpaque(page)->maxoff * GinSizeOfDataPageItem(page) \ + - GinPageGetOpaque(page)->maxoff * sizeof(PostingItem) \ - MAXALIGN(sizeof(GinPageOpaqueData))) -#define GinMaxLeafDataItems \ - ((BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - \ - MAXALIGN(sizeof(ItemPointerData)) - \ - MAXALIGN(sizeof(GinPageOpaqueData))) \ - / sizeof(ItemPointerData)) +#define GinDataLeafMaxContentSize \ + (BLCKSZ - MAXALIGN(SizeOfPageHeaderData) \ + - MAXALIGN(sizeof(ItemPointerData)) \ + - MAXALIGN(sizeof(GinPageOpaqueData))) /* * List pages @@ -318,6 +355,23 @@ typedef struct GinState Oid supportCollation[INDEX_MAX_KEYS]; } GinState; + +/* + * A compressed posting list. + * + * Note: This requires 2-byte alignment. + */ +typedef struct +{ + ItemPointerData first; /* first item in this posting list (unpacked) */ + uint16 nbytes; /* number of bytes that follow */ + unsigned char bytes[1]; /* varbyte encoded items (variable length) */ +} GinPostingList; + +#define SizeOfGinPostingList(plist) (offsetof(GinPostingList, bytes) + SHORTALIGN((plist)->nbytes) ) +#define GinNextPostingListSegment(cur) ((GinPostingList *) (((char *) (cur)) + SizeOfGinPostingList((cur)))) + + /* XLog stuff */ #define XLOG_GIN_CREATE_INDEX 0x00 @@ -328,18 +382,21 @@ typedef struct ginxlogCreatePostingTree { RelFileNode node; BlockNumber blkno; - uint32 nitem; - /* follows list of heap's ItemPointer */ + uint32 size; + /* A compressed posting list follows */ } ginxlogCreatePostingTree; #define XLOG_GIN_INSERT 0x20 -typedef struct ginxlogInsert +/* + * The format of the insertion record varies depending on the page type. + * ginxlogInsert is the common part between all variants. + */ +typedef struct { RelFileNode node; BlockNumber blkno; uint16 flags; /* GIN_SPLIT_ISLEAF and/or GIN_SPLIT_ISDATA */ - OffsetNumber offset; /* * FOLLOWS: @@ -358,17 +415,25 @@ typedef struct ginxlogInsert typedef struct { + OffsetNumber offset; bool isDelete; IndexTupleData tuple; /* variable length */ } ginxlogInsertEntry; typedef struct { - OffsetNumber nitem; - ItemPointerData items[1]; /* variable length */ -} ginxlogInsertDataLeaf; + uint16 length; + uint16 unmodifiedsize; -/* In an insert to an internal data page, the payload is a PostingItem */ + /* compressed segments, variable length */ + char newdata[1]; +} ginxlogRecompressDataLeaf; + +typedef struct +{ + OffsetNumber offset; + PostingItem newitem; +} ginxlogInsertDataInternal; #define XLOG_GIN_SPLIT 0x30 @@ -403,23 +468,56 @@ typedef struct typedef struct { + uint16 lsize; + uint16 rsize; + ItemPointerData lrightbound; /* new right bound of left page */ + ItemPointerData rrightbound; /* new right bound of right page */ + + /* FOLLOWS: new compressed posting lists of left and right page */ + char newdata[1]; +} ginxlogSplitDataLeaf; + +typedef struct +{ OffsetNumber separator; OffsetNumber nitem; ItemPointerData rightbound; - /* FOLLOWS: array of ItemPointers (for leaf) or PostingItems (non-leaf) */ -} ginxlogSplitData; + /* FOLLOWS: array of PostingItems */ +} ginxlogSplitDataInternal; +/* + * Vacuum simply WAL-logs the whole page, when anything is modified. This + * functionally identical heap_newpage records, but is kept separate for + * debugging purposes. (When inspecting the WAL stream, it's easier to see + * what's going on when GIN vacuum records are marked as such, not as heap + * records.) This is currently only used for entry tree leaf pages. + */ #define XLOG_GIN_VACUUM_PAGE 0x40 typedef struct ginxlogVacuumPage { RelFileNode node; BlockNumber blkno; - OffsetNumber nitem; - /* follows content of page */ + uint16 hole_offset; /* number of bytes before "hole" */ + uint16 hole_length; /* number of bytes in "hole" */ + /* entire page contents (minus the hole) follow at end of record */ } ginxlogVacuumPage; +/* + * Vacuuming posting tree leaf page is WAL-logged like recompression caused + * by insertion. + */ +#define XLOG_GIN_VACUUM_DATA_LEAF_PAGE 0x90 + +typedef struct ginxlogVacuumDataLeafPage +{ + RelFileNode node; + BlockNumber blkno; + + ginxlogRecompressDataLeaf data; +} ginxlogVacuumDataLeafPage; + #define XLOG_GIN_DELETE_PAGE 0x50 typedef struct ginxlogDeletePage @@ -506,6 +604,7 @@ typedef struct GinBtreeStack BlockNumber blkno; Buffer buffer; OffsetNumber off; + ItemPointerData iptr; /* predictNumber contains predicted number of pages on current level */ uint32 predictNumber; struct GinBtreeStack *parent; @@ -513,6 +612,14 @@ typedef struct GinBtreeStack typedef struct GinBtreeData *GinBtree; +/* Return codes for GinBtreeData.placeToPage method */ +typedef enum +{ + UNMODIFIED, + INSERTED, + SPLIT +} GinPlaceToPageRC; + typedef struct GinBtreeData { /* search methods */ @@ -523,8 +630,7 @@ typedef struct GinBtreeData /* insert methods */ OffsetNumber (*findChildPtr) (GinBtree, Page, BlockNumber, OffsetNumber); - bool (*placeToPage) (GinBtree, Buffer, OffsetNumber, void *, BlockNumber, XLogRecData **); - Page (*splitPage) (GinBtree, Buffer, Buffer, OffsetNumber, void *, BlockNumber, XLogRecData **); + GinPlaceToPageRC (*placeToPage) (GinBtree, Buffer, GinBtreeStack *, void *, BlockNumber, XLogRecData **, Page *, Page *); void *(*prepareDownlink) (GinBtree, Buffer); void (*fillRoot) (GinBtree, Page, BlockNumber, Page, BlockNumber, Page); @@ -577,14 +683,17 @@ extern void ginInsertValue(GinBtree btree, GinBtreeStack *stack, /* ginentrypage.c */ extern IndexTuple GinFormTuple(GinState *ginstate, OffsetNumber attnum, Datum key, GinNullCategory category, - ItemPointerData *ipd, uint32 nipd, bool errorTooBig); -extern void GinShortenTuple(IndexTuple itup, uint32 nipd); + Pointer data, Size dataSize, int nipd, bool errorTooBig); extern void ginPrepareEntryScan(GinBtree btree, OffsetNumber attnum, Datum key, GinNullCategory category, GinState *ginstate); extern void ginEntryFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage); +extern ItemPointer ginReadTuple(GinState *ginstate, OffsetNumber attnum, + IndexTuple itup, int *nitems); /* gindatapage.c */ +extern ItemPointer GinDataLeafPageGetItems(Page page, int *nitems); +extern int GinDataLeafPageGetItemsToTbm(Page page, TIDBitmap *tbm); extern BlockNumber createPostingTree(Relation index, ItemPointerData *items, uint32 nitems, GinStatsData *buildStats); @@ -598,6 +707,15 @@ extern GinBtreeStack *ginScanBeginPostingTree(Relation index, BlockNumber rootBl extern void ginDataFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage); extern void ginPrepareDataScan(GinBtree btree, Relation index, BlockNumber rootBlkno); +/* + * This is declared in ginvacuum.c, but is passed between ginVacuumItemPointers + * and ginVacuumPostingTreeLeaf and as an opaque struct, so we need a forward + * declaration for it. + */ +typedef struct GinVacuumState GinVacuumState; + +extern void ginVacuumPostingTreeLeaf(Relation rel, Buffer buf, GinVacuumState *gvs); + /* ginscan.c */ /* @@ -679,7 +797,7 @@ typedef struct GinScanEntryData /* used for Posting list and one page in Posting tree */ ItemPointerData *list; - uint32 nlist; + int nlist; OffsetNumber offset; bool isFinished; @@ -717,6 +835,8 @@ extern Datum gingetbitmap(PG_FUNCTION_ARGS); /* ginvacuum.c */ extern Datum ginbulkdelete(PG_FUNCTION_ARGS); extern Datum ginvacuumcleanup(PG_FUNCTION_ARGS); +extern ItemPointer ginVacuumItemPointers(GinVacuumState *gvs, + ItemPointerData *items, int nitem, int *nremaining); /* ginbulk.c */ typedef struct GinEntryAccumulator @@ -770,11 +890,17 @@ extern void ginInsertCleanup(GinState *ginstate, bool vac_delay, IndexBulkDeleteResult *stats); /* ginpostinglist.c */ -extern uint32 ginMergeItemPointers(ItemPointerData *dst, + +extern GinPostingList *ginCompressPostingList(const ItemPointer ptrs, int nptrs, + int maxsize, int *nwritten); +extern int ginPostingListDecodeAllSegmentsToTbm(GinPostingList *ptr, int totalsize, TIDBitmap *tbm); + +extern ItemPointer ginPostingListDecodeAllSegments(GinPostingList *ptr, int len, int *ndecoded); +extern ItemPointer ginPostingListDecode(GinPostingList *ptr, int *ndecoded); +extern int ginMergeItemPointers(ItemPointerData *dst, ItemPointerData *a, uint32 na, ItemPointerData *b, uint32 nb); - /* * Merging the results of several gin scans compares item pointers a lot, * so we want this to be inlined. But if the compiler doesn't support that, |
