From 2a6368343ff43743ddd90d0f4c2d0ac03e18aa85 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Wed, 19 Sep 2018 01:54:10 +0300 Subject: Add support for nearest-neighbor (KNN) searches to SP-GiST Currently, KNN searches were supported only by GiST. SP-GiST also capable to support them. This commit implements that support. SP-GiST scan stack is replaced with queue, which serves as stack if no ordering is specified. KNN support is provided for three SP-GIST opclasses: quad_point_ops, kd_point_ops and poly_ops (catversion is bumped). Some common parts between GiST and SP-GiST KNNs are extracted into separate functions. Discussion: https://postgr.es/m/570825e8-47d0-4732-2bf6-88d67d2d51c8%40postgrespro.ru Author: Nikita Glukhov, Alexander Korotkov based on GSoC work by Vlad Sterzhanov Review: Andrey Borodin, Alexander Korotkov --- src/include/access/genam.h | 3 +++ src/include/access/spgist.h | 13 ++++++++-- src/include/access/spgist_private.h | 48 ++++++++++++++++++++++++++++++++++--- src/include/catalog/catversion.h | 2 +- src/include/catalog/pg_amop.dat | 12 ++++++++++ src/include/utils/lsyscache.h | 3 +++ 6 files changed, 75 insertions(+), 6 deletions(-) (limited to 'src/include') diff --git a/src/include/access/genam.h b/src/include/access/genam.h index 24c720bf421..534fac7bf2f 100644 --- a/src/include/access/genam.h +++ b/src/include/access/genam.h @@ -174,6 +174,9 @@ extern RegProcedure index_getprocid(Relation irel, AttrNumber attnum, uint16 procnum); extern FmgrInfo *index_getprocinfo(Relation irel, AttrNumber attnum, uint16 procnum); +extern void index_store_float8_orderby_distances(IndexScanDesc scan, + Oid *orderByTypes, double *distances, + bool recheckOrderBy); /* * index access method support routines (in genam.c) diff --git a/src/include/access/spgist.h b/src/include/access/spgist.h index c6d7e22a389..9c19e9e6382 100644 --- a/src/include/access/spgist.h +++ b/src/include/access/spgist.h @@ -136,7 +136,10 @@ typedef struct spgPickSplitOut typedef struct spgInnerConsistentIn { ScanKey scankeys; /* array of operators and comparison values */ - int nkeys; /* length of array */ + ScanKey orderbys; /* array of ordering operators and comparison + * values */ + int nkeys; /* length of scankeys array */ + int norderbys; /* length of orderbys array */ Datum reconstructedValue; /* value reconstructed at parent */ void *traversalValue; /* opclass-specific traverse value */ @@ -159,6 +162,7 @@ typedef struct spgInnerConsistentOut int *levelAdds; /* increment level by this much for each */ Datum *reconstructedValues; /* associated reconstructed values */ void **traversalValues; /* opclass-specific traverse values */ + double **distances; /* associated distances */ } spgInnerConsistentOut; /* @@ -167,7 +171,10 @@ typedef struct spgInnerConsistentOut typedef struct spgLeafConsistentIn { ScanKey scankeys; /* array of operators and comparison values */ - int nkeys; /* length of array */ + ScanKey orderbys; /* array of ordering operators and comparison + * values */ + int nkeys; /* length of scankeys array */ + int norderbys; /* length of orderbys array */ Datum reconstructedValue; /* value reconstructed at parent */ void *traversalValue; /* opclass-specific traverse value */ @@ -181,6 +188,8 @@ typedef struct spgLeafConsistentOut { Datum leafValue; /* reconstructed original data, if any */ bool recheck; /* set true if operator must be rechecked */ + bool recheckDistances; /* set true if distances must be rechecked */ + double *distances; /* associated distances */ } spgLeafConsistentOut; diff --git a/src/include/access/spgist_private.h b/src/include/access/spgist_private.h index 99365c8a45d..d23862ea71d 100644 --- a/src/include/access/spgist_private.h +++ b/src/include/access/spgist_private.h @@ -18,6 +18,7 @@ #include "access/spgist.h" #include "nodes/tidbitmap.h" #include "storage/buf.h" +#include "utils/geo_decls.h" #include "utils/relcache.h" @@ -130,14 +131,35 @@ typedef struct SpGistState bool isBuild; /* true if doing index build */ } SpGistState; +typedef struct SpGistSearchItem +{ + pairingheap_node phNode; /* pairing heap node */ + Datum value; /* value reconstructed from parent or + * leafValue if heaptuple */ + void *traversalValue; /* opclass-specific traverse value */ + int level; /* level of items on this page */ + ItemPointerData heapPtr; /* heap info, if heap tuple */ + bool isNull; /* SearchItem is NULL item */ + bool isLeaf; /* SearchItem is heap item */ + bool recheck; /* qual recheck is needed */ + bool recheckDistances; /* distance recheck is needed */ + + /* array with numberOfOrderBys entries */ + double distances[FLEXIBLE_ARRAY_MEMBER]; +} SpGistSearchItem; + +#define SizeOfSpGistSearchItem(n_distances) \ + (offsetof(SpGistSearchItem, distances) + sizeof(double) * (n_distances)) + /* * Private state of an index scan */ typedef struct SpGistScanOpaqueData { SpGistState state; /* see above */ + pairingheap *scanQueue; /* queue of to be visited items */ MemoryContext tempCxt; /* short-lived memory context */ - MemoryContext traversalCxt; /* memory context for traversalValues */ + MemoryContext traversalCxt; /* single scan lifetime memory context */ /* Control flags showing whether to search nulls and/or non-nulls */ bool searchNulls; /* scan matches (all) null entries */ @@ -146,9 +168,18 @@ typedef struct SpGistScanOpaqueData /* Index quals to be passed to opclass (null-related quals removed) */ int numberOfKeys; /* number of index qualifier conditions */ ScanKey keyData; /* array of index qualifier descriptors */ + int numberOfOrderBys; /* number of ordering operators */ + ScanKey orderByData; /* array of ordering op descriptors */ + Oid *orderByTypes; /* array of ordering op return types */ + Oid indexCollation; /* collation of index column */ - /* Stack of yet-to-be-visited pages */ - List *scanStack; /* List of ScanStackEntrys */ + /* Opclass defined functions: */ + FmgrInfo innerConsistentFn; + FmgrInfo leafConsistentFn; + + /* Pre-allocated workspace arrays: */ + double *zeroDistances; + double *infDistances; /* These fields are only used in amgetbitmap scans: */ TIDBitmap *tbm; /* bitmap being filled */ @@ -161,7 +192,10 @@ typedef struct SpGistScanOpaqueData int iPtr; /* index for scanning through same */ ItemPointerData heapPtrs[MaxIndexTuplesPerPage]; /* TIDs from cur page */ bool recheck[MaxIndexTuplesPerPage]; /* their recheck flags */ + bool recheckDistances[MaxIndexTuplesPerPage]; /* distance recheck + * flags */ HeapTuple reconTups[MaxIndexTuplesPerPage]; /* reconstructed tuples */ + double *distances[MaxIndexTuplesPerPage]; /* distances (for recheck) */ /* * Note: using MaxIndexTuplesPerPage above is a bit hokey since @@ -410,6 +444,9 @@ extern OffsetNumber SpGistPageAddNewItem(SpGistState *state, Page page, Item item, Size size, OffsetNumber *startOffset, bool errorOK); +extern bool spgproperty(Oid index_oid, int attno, + IndexAMProperty prop, const char *propname, + bool *res, bool *isnull); /* spgdoinsert.c */ extern void spgUpdateNodeLink(SpGistInnerTuple tup, int nodeN, @@ -421,4 +458,9 @@ extern void spgPageIndexMultiDelete(SpGistState *state, Page page, extern bool spgdoinsert(Relation index, SpGistState *state, ItemPointer heapPtr, Datum datum, bool isnull); +/* spgproc.c */ +extern double *spg_key_orderbys_distances(Datum key, bool isLeaf, + ScanKey orderbys, int norderbys); +extern BOX *box_copy(BOX *orig); + #endif /* SPGIST_PRIVATE_H */ diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 30bf93f7c30..6ab2187949a 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201809181 +#define CATALOG_VERSION_NO 201809191 #endif diff --git a/src/include/catalog/pg_amop.dat b/src/include/catalog/pg_amop.dat index fb58f774b93..5f85e9507ca 100644 --- a/src/include/catalog/pg_amop.dat +++ b/src/include/catalog/pg_amop.dat @@ -1401,6 +1401,10 @@ { amopfamily => 'spgist/quad_point_ops', amoplefttype => 'point', amoprighttype => 'box', amopstrategy => '8', amopopr => '<@(point,box)', amopmethod => 'spgist' }, +{ amopfamily => 'spgist/quad_point_ops', amoplefttype => 'point', + amoprighttype => 'point', amopstrategy => '15', amopopr => '<->(point,point)', + amopmethod => 'spgist', amoppurpose => 'o', + amopsortfamily => 'btree/float_ops' }, # SP-GiST kd_point_ops { amopfamily => 'spgist/kd_point_ops', amoplefttype => 'point', @@ -1421,6 +1425,10 @@ { amopfamily => 'spgist/kd_point_ops', amoplefttype => 'point', amoprighttype => 'box', amopstrategy => '8', amopopr => '<@(point,box)', amopmethod => 'spgist' }, +{ amopfamily => 'spgist/kd_point_ops', amoplefttype => 'point', + amoprighttype => 'point', amopstrategy => '15', amopopr => '<->(point,point)', + amopmethod => 'spgist', amoppurpose => 'o', + amopsortfamily => 'btree/float_ops' }, # SP-GiST text_ops { amopfamily => 'spgist/text_ops', amoplefttype => 'text', @@ -1590,6 +1598,10 @@ { amopfamily => 'spgist/poly_ops', amoplefttype => 'polygon', amoprighttype => 'polygon', amopstrategy => '12', amopopr => '|&>(polygon,polygon)', amopmethod => 'spgist' }, +{ amopfamily => 'spgist/poly_ops', amoplefttype => 'polygon', + amoprighttype => 'point', amopstrategy => '15', + amopopr => '<->(polygon,point)', amoppurpose => 'o', amopmethod => 'spgist', + amopsortfamily => 'btree/float_ops' }, # GiST inet_ops { amopfamily => 'gist/network_ops', amoplefttype => 'inet', diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h index e55ea4035b8..e0eea2a0dc5 100644 --- a/src/include/utils/lsyscache.h +++ b/src/include/utils/lsyscache.h @@ -95,6 +95,8 @@ extern char *get_constraint_name(Oid conoid); extern char *get_language_name(Oid langoid, bool missing_ok); extern Oid get_opclass_family(Oid opclass); extern Oid get_opclass_input_type(Oid opclass); +extern bool get_opclass_opfamily_and_input_type(Oid opclass, + Oid *opfamily, Oid *opcintype); extern RegProcedure get_opcode(Oid opno); extern char *get_opname(Oid opno); extern Oid get_op_rettype(Oid opno); @@ -176,6 +178,7 @@ extern void free_attstatsslot(AttStatsSlot *sslot); extern char *get_namespace_name(Oid nspid); extern char *get_namespace_name_or_temp(Oid nspid); extern Oid get_range_subtype(Oid rangeOid); +extern Oid get_index_column_opclass(Oid index_oid, int attno); #define type_is_array(typid) (get_element_type(typid) != InvalidOid) /* type_is_array_domain accepts both plain arrays and domains over arrays */ -- cgit v1.2.3