summaryrefslogtreecommitdiff
path: root/src/include/access
diff options
context:
space:
mode:
authorTomas Vondra2025-03-03 15:53:03 +0000
committerTomas Vondra2025-03-03 15:53:06 +0000
commit8492feb98f6df3f0f03e84ed56f0d1cbb2ac514c (patch)
tree8b5775ca7cdb77a61c4ada41b45579e50bc3cf35 /src/include/access
parent3f1db99bfabbb9d4afc41f362d9801512f4c7c65 (diff)
Allow parallel CREATE INDEX for GIN indexes
Allow using parallel workers to build a GIN index, similarly to BTREE and BRIN. For large tables this may result in significant speedup when the build is CPU-bound. The work is divided so that each worker builds index entries on a subset of the table, determined by the regular parallel scan used to read the data. Each worker uses a local tuplesort to sort and merge the entries for the same key. The TID lists do not overlap (for a given key), which means the merge sort simply concatenates the two lists. The merged entries are written into a shared tuplesort for the leader. The leader needs to merge the sorted entries again, before writing them into the index. But this way a significant part of the work happens in the workers, and the leader is left with merging fewer large entries, which is more efficient. Most of the parallelism infrastructure is a simplified copy of the code used by BTREE indexes, omitting the parts irrelevant for GIN indexes (e.g. uniqueness checks). Original patch by me, with reviews and substantial improvements by Matthias van de Meent, certainly enough to make him a co-author. Author: Tomas Vondra, Matthias van de Meent Reviewed-by: Matthias van de Meent, Andy Fan, Kirill Reshke Discussion: https://postgr.es/m/6ab4003f-a8b8-4d75-a67f-f25ad98582dc%40enterprisedb.com
Diffstat (limited to 'src/include/access')
-rw-r--r--src/include/access/gin.h15
-rw-r--r--src/include/access/gin_private.h1
-rw-r--r--src/include/access/gin_tuple.h44
3 files changed, 60 insertions, 0 deletions
diff --git a/src/include/access/gin.h b/src/include/access/gin.h
index 9ed48dfde4b..2e1076a0499 100644
--- a/src/include/access/gin.h
+++ b/src/include/access/gin.h
@@ -12,6 +12,8 @@
#include "access/xlogreader.h"
#include "lib/stringinfo.h"
+#include "nodes/execnodes.h"
+#include "storage/shm_toc.h"
#include "storage/block.h"
#include "utils/relcache.h"
@@ -37,6 +39,17 @@
#define GIN_SEARCH_MODE_EVERYTHING 3 /* for internal use only */
/*
+ * Constant definition for progress reporting. Phase numbers must match
+ * ginbuildphasename.
+ */
+/* PROGRESS_CREATEIDX_SUBPHASE_INITIALIZE is 1 (see progress.h) */
+#define PROGRESS_GIN_PHASE_INDEXBUILD_TABLESCAN 2
+#define PROGRESS_GIN_PHASE_PERFORMSORT_1 3
+#define PROGRESS_GIN_PHASE_MERGE_1 4
+#define PROGRESS_GIN_PHASE_PERFORMSORT_2 5
+#define PROGRESS_GIN_PHASE_MERGE_2 6
+
+/*
* GinStatsData represents stats data for planner use
*/
typedef struct GinStatsData
@@ -88,4 +101,6 @@ extern void ginGetStats(Relation index, GinStatsData *stats);
extern void ginUpdateStats(Relation index, const GinStatsData *stats,
bool is_build);
+extern void _gin_parallel_build_main(dsm_segment *seg, shm_toc *toc);
+
#endif /* GIN_H */
diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h
index 50478db9820..95d8805b66f 100644
--- a/src/include/access/gin_private.h
+++ b/src/include/access/gin_private.h
@@ -109,6 +109,7 @@ extern Datum *ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
extern OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple);
extern Datum gintuple_get_key(GinState *ginstate, IndexTuple tuple,
GinNullCategory *category);
+extern char *ginbuildphasename(int64 phasenum);
/* gininsert.c */
extern IndexBuildResult *ginbuild(Relation heap, Relation index,
diff --git a/src/include/access/gin_tuple.h b/src/include/access/gin_tuple.h
new file mode 100644
index 00000000000..ce555031335
--- /dev/null
+++ b/src/include/access/gin_tuple.h
@@ -0,0 +1,44 @@
+/*--------------------------------------------------------------------------
+ * gin.h
+ * Public header file for Generalized Inverted Index access method.
+ *
+ * Copyright (c) 2006-2024, PostgreSQL Global Development Group
+ *
+ * src/include/access/gin.h
+ *--------------------------------------------------------------------------
+ */
+#ifndef GIN_TUPLE_
+#define GIN_TUPLE_
+
+#include "access/ginblock.h"
+#include "storage/itemptr.h"
+#include "utils/sortsupport.h"
+
+/*
+ * Data for one key in a GIN index.
+ */
+typedef struct GinTuple
+{
+ int tuplen; /* length of the whole tuple */
+ OffsetNumber attrnum; /* attnum of index key */
+ uint16 keylen; /* bytes in data for key value */
+ int16 typlen; /* typlen for key */
+ bool typbyval; /* typbyval for key */
+ signed char category; /* category: normal or NULL? */
+ int nitems; /* number of TIDs in the data */
+ char data[FLEXIBLE_ARRAY_MEMBER];
+} GinTuple;
+
+static inline ItemPointer
+GinTupleGetFirst(GinTuple *tup)
+{
+ GinPostingList *list;
+
+ list = (GinPostingList *) SHORTALIGN(tup->data + tup->keylen);
+
+ return &list->first;
+}
+
+extern int _gin_compare_tuples(GinTuple *a, GinTuple *b, SortSupport ssup);
+
+#endif /* GIN_TUPLE_H */