summaryrefslogtreecommitdiff
path: root/src/include/access
diff options
context:
space:
mode:
Diffstat (limited to 'src/include/access')
-rw-r--r--src/include/access/gin_private.h212
1 files changed, 169 insertions, 43 deletions
diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h
index 5f0fe80e807..3f92c376d72 100644
--- a/src/include/access/gin_private.h
+++ b/src/include/access/gin_private.h
@@ -32,11 +32,8 @@
typedef struct GinPageOpaqueData
{
BlockNumber rightlink; /* next page if any */
- OffsetNumber maxoff; /* number entries on GIN_DATA page: number of
- * heap ItemPointers on GIN_DATA|GIN_LEAF page
- * or number of PostingItems on GIN_DATA &
- * ~GIN_LEAF page. On GIN_LIST page, number of
- * heap tuples. */
+ OffsetNumber maxoff; /* number of PostingItems on GIN_DATA & ~GIN_LEAF page.
+ * On GIN_LIST page, number of heap tuples. */
uint16 flags; /* see bit definitions below */
} GinPageOpaqueData;
@@ -49,6 +46,7 @@ typedef GinPageOpaqueData *GinPageOpaque;
#define GIN_LIST (1 << 4)
#define GIN_LIST_FULLROW (1 << 5) /* makes sense only on GIN_LIST page */
#define GIN_INCOMPLETE_SPLIT (1 << 6) /* page was split, but parent not updated */
+#define GIN_COMPRESSED (1 << 7)
/* Page numbers of fixed-location pages */
#define GIN_METAPAGE_BLKNO (0)
@@ -88,7 +86,12 @@ typedef struct GinMetaPageData
* GIN version number (ideally this should have been at the front, but too
* late now. Don't move it!)
*
- * Currently 1 (for indexes initialized in 9.1 or later)
+ * Currently 2 (for indexes initialized in 9.4 or later)
+ *
+ * Version 1 (indexes initialized in version 9.1, 9.2 or 9.3), is
+ * compatible, but may contain uncompressed posting tree (leaf) pages and
+ * posting lists. They will be converted to compressed format when
+ * modified.
*
* Version 0 (indexes initialized in 9.0 or before) is compatible but may
* be missing null entries, including both null keys and placeholders.
@@ -97,7 +100,7 @@ typedef struct GinMetaPageData
int32 ginVersion;
} GinMetaPageData;
-#define GIN_CURRENT_VERSION 1
+#define GIN_CURRENT_VERSION 2
#define GinPageGetMeta(p) \
((GinMetaPageData *) PageGetContents(p))
@@ -116,6 +119,8 @@ typedef struct GinMetaPageData
#define GinPageSetList(page) ( GinPageGetOpaque(page)->flags |= GIN_LIST )
#define GinPageHasFullRow(page) ( GinPageGetOpaque(page)->flags & GIN_LIST_FULLROW )
#define GinPageSetFullRow(page) ( GinPageGetOpaque(page)->flags |= GIN_LIST_FULLROW )
+#define GinPageIsCompressed(page) ( GinPageGetOpaque(page)->flags & GIN_COMPRESSED )
+#define GinPageSetCompressed(page) ( GinPageGetOpaque(page)->flags |= GIN_COMPRESSED )
#define GinPageIsDeleted(page) ( GinPageGetOpaque(page)->flags & GIN_DELETED)
#define GinPageSetDeleted(page) ( GinPageGetOpaque(page)->flags |= GIN_DELETED)
@@ -213,13 +218,16 @@ typedef signed char GinNullCategory;
#define GinSetPostingTree(itup, blkno) ( GinSetNPosting((itup),GIN_TREE_POSTING), ItemPointerSetBlockNumber(&(itup)->t_tid, blkno) )
#define GinGetPostingTree(itup) GinItemPointerGetBlockNumber(&(itup)->t_tid)
-#define GinGetPostingOffset(itup) GinItemPointerGetBlockNumber(&(itup)->t_tid)
-#define GinSetPostingOffset(itup,n) ItemPointerSetBlockNumber(&(itup)->t_tid,n)
-#define GinGetPosting(itup) ((ItemPointer) ((char*)(itup) + GinGetPostingOffset(itup)))
+#define GIN_ITUP_COMPRESSED (1 << 31)
+#define GinGetPostingOffset(itup) (GinItemPointerGetBlockNumber(&(itup)->t_tid) & (~GIN_ITUP_COMPRESSED))
+#define GinSetPostingOffset(itup,n) ItemPointerSetBlockNumber(&(itup)->t_tid,(n)|GIN_ITUP_COMPRESSED)
+#define GinGetPosting(itup) ((Pointer) ((char*)(itup) + GinGetPostingOffset(itup)))
+#define GinItupIsCompressed(itup) (GinItemPointerGetBlockNumber(&(itup)->t_tid) & GIN_ITUP_COMPRESSED)
#define GinMaxItemSize \
- MAXALIGN_DOWN(((BLCKSZ - SizeOfPageHeaderData - \
- MAXALIGN(sizeof(GinPageOpaqueData))) / 3 - sizeof(ItemIdData)))
+ Min(INDEX_SIZE_MASK, \
+ MAXALIGN_DOWN(((BLCKSZ - SizeOfPageHeaderData - \
+ MAXALIGN(sizeof(GinPageOpaqueData))) / 6 - sizeof(ItemIdData))))
/*
* Access macros for non-leaf entry tuples
@@ -230,30 +238,59 @@ typedef signed char GinNullCategory;
/*
* Data (posting tree) pages
+ *
+ * Posting tree pages don't store regular tuples. Non-leaf pages contain
+ * PostingItems, which are pairs of ItemPointers and child block numbers.
+ * Leaf pages contain GinPostingLists and an uncompressed array of item
+ * pointers.
+ *
+ * In a leaf page, the compressed posting lists are stored after the regular
+ * page header, one after each other. Although we don't store regular tuples,
+ * pd_lower is used to indicate the end of the posting lists. After that, free
+ * space follows. This layout is compatible with the "standard" heap and
+ * index page layout described in bufpage.h, so that we can e.g set buffer_std
+ * when writing WAL records.
+ *
+ * In the special space is the GinPageOpaque struct.
*/
+#define GinDataLeafPageGetPostingList(page) \
+ (GinPostingList *) ((PageGetContents(page) + MAXALIGN(sizeof(ItemPointerData))))
+#define GinDataLeafPageGetPostingListSize(page) \
+ (((PageHeader) page)->pd_lower - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(ItemPointerData)))
+#define GinDataLeafPageSetPostingListSize(page, size) \
+ { \
+ Assert(size <= GinDataLeafMaxContentSize); \
+ ((PageHeader) page)->pd_lower = (size) + MAXALIGN(SizeOfPageHeaderData) + MAXALIGN(sizeof(ItemPointerData)); \
+ }
+
+#define GinDataLeafPageIsEmpty(page) \
+ (GinPageIsCompressed(page) ? (GinDataLeafPageGetPostingListSize(page) == 0) : (GinPageGetOpaque(page)->maxoff < FirstOffsetNumber))
+
+#define GinDataLeafPageGetFreeSpace(page) PageGetExactFreeSpace(page)
+
#define GinDataPageGetRightBound(page) ((ItemPointer) PageGetContents(page))
+/*
+ * Pointer to the data portion of a posting tree page. For internal pages,
+ * that's the beginning of the array of PostingItems. For compressed leaf
+ * pages, the first compressed posting list. For uncompressed (pre-9.4) leaf
+ * pages, it's the beginning of the ItemPointer array.
+ */
#define GinDataPageGetData(page) \
(PageGetContents(page) + MAXALIGN(sizeof(ItemPointerData)))
/* non-leaf pages contain PostingItems */
#define GinDataPageGetPostingItem(page, i) \
((PostingItem *) (GinDataPageGetData(page) + ((i)-1) * sizeof(PostingItem)))
-/* leaf pages contain ItemPointers */
-#define GinDataPageGetItemPointer(page, i) \
- ((ItemPointer) (GinDataPageGetData(page) + ((i)-1) * sizeof(ItemPointerData)))
-#define GinSizeOfDataPageItem(page) \
- (GinPageIsLeaf(page) ? sizeof(ItemPointerData) : sizeof(PostingItem))
-#define GinDataPageGetFreeSpace(page) \
+#define GinNonLeafDataPageGetFreeSpace(page) \
(BLCKSZ - MAXALIGN(SizeOfPageHeaderData) \
- MAXALIGN(sizeof(ItemPointerData)) \
- - GinPageGetOpaque(page)->maxoff * GinSizeOfDataPageItem(page) \
+ - GinPageGetOpaque(page)->maxoff * sizeof(PostingItem) \
- MAXALIGN(sizeof(GinPageOpaqueData)))
-#define GinMaxLeafDataItems \
- ((BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - \
- MAXALIGN(sizeof(ItemPointerData)) - \
- MAXALIGN(sizeof(GinPageOpaqueData))) \
- / sizeof(ItemPointerData))
+#define GinDataLeafMaxContentSize \
+ (BLCKSZ - MAXALIGN(SizeOfPageHeaderData) \
+ - MAXALIGN(sizeof(ItemPointerData)) \
+ - MAXALIGN(sizeof(GinPageOpaqueData)))
/*
* List pages
@@ -318,6 +355,23 @@ typedef struct GinState
Oid supportCollation[INDEX_MAX_KEYS];
} GinState;
+
+/*
+ * A compressed posting list.
+ *
+ * Note: This requires 2-byte alignment.
+ */
+typedef struct
+{
+ ItemPointerData first; /* first item in this posting list (unpacked) */
+ uint16 nbytes; /* number of bytes that follow */
+ unsigned char bytes[1]; /* varbyte encoded items (variable length) */
+} GinPostingList;
+
+#define SizeOfGinPostingList(plist) (offsetof(GinPostingList, bytes) + SHORTALIGN((plist)->nbytes) )
+#define GinNextPostingListSegment(cur) ((GinPostingList *) (((char *) (cur)) + SizeOfGinPostingList((cur))))
+
+
/* XLog stuff */
#define XLOG_GIN_CREATE_INDEX 0x00
@@ -328,18 +382,21 @@ typedef struct ginxlogCreatePostingTree
{
RelFileNode node;
BlockNumber blkno;
- uint32 nitem;
- /* follows list of heap's ItemPointer */
+ uint32 size;
+ /* A compressed posting list follows */
} ginxlogCreatePostingTree;
#define XLOG_GIN_INSERT 0x20
-typedef struct ginxlogInsert
+/*
+ * The format of the insertion record varies depending on the page type.
+ * ginxlogInsert is the common part between all variants.
+ */
+typedef struct
{
RelFileNode node;
BlockNumber blkno;
uint16 flags; /* GIN_SPLIT_ISLEAF and/or GIN_SPLIT_ISDATA */
- OffsetNumber offset;
/*
* FOLLOWS:
@@ -358,17 +415,25 @@ typedef struct ginxlogInsert
typedef struct
{
+ OffsetNumber offset;
bool isDelete;
IndexTupleData tuple; /* variable length */
} ginxlogInsertEntry;
typedef struct
{
- OffsetNumber nitem;
- ItemPointerData items[1]; /* variable length */
-} ginxlogInsertDataLeaf;
+ uint16 length;
+ uint16 unmodifiedsize;
-/* In an insert to an internal data page, the payload is a PostingItem */
+ /* compressed segments, variable length */
+ char newdata[1];
+} ginxlogRecompressDataLeaf;
+
+typedef struct
+{
+ OffsetNumber offset;
+ PostingItem newitem;
+} ginxlogInsertDataInternal;
#define XLOG_GIN_SPLIT 0x30
@@ -403,23 +468,56 @@ typedef struct
typedef struct
{
+ uint16 lsize;
+ uint16 rsize;
+ ItemPointerData lrightbound; /* new right bound of left page */
+ ItemPointerData rrightbound; /* new right bound of right page */
+
+ /* FOLLOWS: new compressed posting lists of left and right page */
+ char newdata[1];
+} ginxlogSplitDataLeaf;
+
+typedef struct
+{
OffsetNumber separator;
OffsetNumber nitem;
ItemPointerData rightbound;
- /* FOLLOWS: array of ItemPointers (for leaf) or PostingItems (non-leaf) */
-} ginxlogSplitData;
+ /* FOLLOWS: array of PostingItems */
+} ginxlogSplitDataInternal;
+/*
+ * Vacuum simply WAL-logs the whole page, when anything is modified. This
+ * functionally identical heap_newpage records, but is kept separate for
+ * debugging purposes. (When inspecting the WAL stream, it's easier to see
+ * what's going on when GIN vacuum records are marked as such, not as heap
+ * records.) This is currently only used for entry tree leaf pages.
+ */
#define XLOG_GIN_VACUUM_PAGE 0x40
typedef struct ginxlogVacuumPage
{
RelFileNode node;
BlockNumber blkno;
- OffsetNumber nitem;
- /* follows content of page */
+ uint16 hole_offset; /* number of bytes before "hole" */
+ uint16 hole_length; /* number of bytes in "hole" */
+ /* entire page contents (minus the hole) follow at end of record */
} ginxlogVacuumPage;
+/*
+ * Vacuuming posting tree leaf page is WAL-logged like recompression caused
+ * by insertion.
+ */
+#define XLOG_GIN_VACUUM_DATA_LEAF_PAGE 0x90
+
+typedef struct ginxlogVacuumDataLeafPage
+{
+ RelFileNode node;
+ BlockNumber blkno;
+
+ ginxlogRecompressDataLeaf data;
+} ginxlogVacuumDataLeafPage;
+
#define XLOG_GIN_DELETE_PAGE 0x50
typedef struct ginxlogDeletePage
@@ -506,6 +604,7 @@ typedef struct GinBtreeStack
BlockNumber blkno;
Buffer buffer;
OffsetNumber off;
+ ItemPointerData iptr;
/* predictNumber contains predicted number of pages on current level */
uint32 predictNumber;
struct GinBtreeStack *parent;
@@ -513,6 +612,14 @@ typedef struct GinBtreeStack
typedef struct GinBtreeData *GinBtree;
+/* Return codes for GinBtreeData.placeToPage method */
+typedef enum
+{
+ UNMODIFIED,
+ INSERTED,
+ SPLIT
+} GinPlaceToPageRC;
+
typedef struct GinBtreeData
{
/* search methods */
@@ -523,8 +630,7 @@ typedef struct GinBtreeData
/* insert methods */
OffsetNumber (*findChildPtr) (GinBtree, Page, BlockNumber, OffsetNumber);
- bool (*placeToPage) (GinBtree, Buffer, OffsetNumber, void *, BlockNumber, XLogRecData **);
- Page (*splitPage) (GinBtree, Buffer, Buffer, OffsetNumber, void *, BlockNumber, XLogRecData **);
+ GinPlaceToPageRC (*placeToPage) (GinBtree, Buffer, GinBtreeStack *, void *, BlockNumber, XLogRecData **, Page *, Page *);
void *(*prepareDownlink) (GinBtree, Buffer);
void (*fillRoot) (GinBtree, Page, BlockNumber, Page, BlockNumber, Page);
@@ -577,14 +683,17 @@ extern void ginInsertValue(GinBtree btree, GinBtreeStack *stack,
/* ginentrypage.c */
extern IndexTuple GinFormTuple(GinState *ginstate,
OffsetNumber attnum, Datum key, GinNullCategory category,
- ItemPointerData *ipd, uint32 nipd, bool errorTooBig);
-extern void GinShortenTuple(IndexTuple itup, uint32 nipd);
+ Pointer data, Size dataSize, int nipd, bool errorTooBig);
extern void ginPrepareEntryScan(GinBtree btree, OffsetNumber attnum,
Datum key, GinNullCategory category,
GinState *ginstate);
extern void ginEntryFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage);
+extern ItemPointer ginReadTuple(GinState *ginstate, OffsetNumber attnum,
+ IndexTuple itup, int *nitems);
/* gindatapage.c */
+extern ItemPointer GinDataLeafPageGetItems(Page page, int *nitems);
+extern int GinDataLeafPageGetItemsToTbm(Page page, TIDBitmap *tbm);
extern BlockNumber createPostingTree(Relation index,
ItemPointerData *items, uint32 nitems,
GinStatsData *buildStats);
@@ -598,6 +707,15 @@ extern GinBtreeStack *ginScanBeginPostingTree(Relation index, BlockNumber rootBl
extern void ginDataFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage);
extern void ginPrepareDataScan(GinBtree btree, Relation index, BlockNumber rootBlkno);
+/*
+ * This is declared in ginvacuum.c, but is passed between ginVacuumItemPointers
+ * and ginVacuumPostingTreeLeaf and as an opaque struct, so we need a forward
+ * declaration for it.
+ */
+typedef struct GinVacuumState GinVacuumState;
+
+extern void ginVacuumPostingTreeLeaf(Relation rel, Buffer buf, GinVacuumState *gvs);
+
/* ginscan.c */
/*
@@ -679,7 +797,7 @@ typedef struct GinScanEntryData
/* used for Posting list and one page in Posting tree */
ItemPointerData *list;
- uint32 nlist;
+ int nlist;
OffsetNumber offset;
bool isFinished;
@@ -717,6 +835,8 @@ extern Datum gingetbitmap(PG_FUNCTION_ARGS);
/* ginvacuum.c */
extern Datum ginbulkdelete(PG_FUNCTION_ARGS);
extern Datum ginvacuumcleanup(PG_FUNCTION_ARGS);
+extern ItemPointer ginVacuumItemPointers(GinVacuumState *gvs,
+ ItemPointerData *items, int nitem, int *nremaining);
/* ginbulk.c */
typedef struct GinEntryAccumulator
@@ -770,11 +890,17 @@ extern void ginInsertCleanup(GinState *ginstate,
bool vac_delay, IndexBulkDeleteResult *stats);
/* ginpostinglist.c */
-extern uint32 ginMergeItemPointers(ItemPointerData *dst,
+
+extern GinPostingList *ginCompressPostingList(const ItemPointer ptrs, int nptrs,
+ int maxsize, int *nwritten);
+extern int ginPostingListDecodeAllSegmentsToTbm(GinPostingList *ptr, int totalsize, TIDBitmap *tbm);
+
+extern ItemPointer ginPostingListDecodeAllSegments(GinPostingList *ptr, int len, int *ndecoded);
+extern ItemPointer ginPostingListDecode(GinPostingList *ptr, int *ndecoded);
+extern int ginMergeItemPointers(ItemPointerData *dst,
ItemPointerData *a, uint32 na,
ItemPointerData *b, uint32 nb);
-
/*
* Merging the results of several gin scans compares item pointers a lot,
* so we want this to be inlined. But if the compiler doesn't support that,