diff options
| author | Peter Geoghegan | 2020-02-26 19:28:25 +0000 |
|---|---|---|
| committer | Peter Geoghegan | 2020-02-26 19:28:25 +0000 |
| commit | 612a1ab76724aa1514b6509269342649f8cab375 (patch) | |
| tree | df13756515fd71d6528958f2315123d89d41b817 /src/backend | |
| parent | 4109bb5de4998b9301ea2ac18c9d6dfb0b4f900b (diff) | |
Add equalimage B-Tree support functions.
Invent the concept of a B-Tree equalimage ("equality implies image
equality") support function, registered as support function 4. This
indicates whether it is safe (or not safe) to apply optimizations that
assume that any two datums considered equal by an operator class's order
method must be interchangeable without any loss of semantic information.
This is static information about an operator class and a collation.
Register an equalimage routine for almost all of the existing B-Tree
opclasses. We only need two trivial routines for all of the opclasses
that are included with the core distribution. There is one routine for
opclasses that index non-collatable types (which returns 'true'
unconditionally), plus another routine for collatable types (which
returns 'true' when the collation is a deterministic collation).
This patch is infrastructure for an upcoming patch that adds B-Tree
deduplication.
Author: Peter Geoghegan, Anastasia Lubennikova
Discussion: https://postgr.es/m/CAH2-Wzn3Ee49Gmxb7V1VJ3-AC8fWn-Fr8pfWQebHe8rYRxt5OQ@mail.gmail.com
Diffstat (limited to 'src/backend')
| -rw-r--r-- | src/backend/access/nbtree/nbtutils.c | 73 | ||||
| -rw-r--r-- | src/backend/access/nbtree/nbtvalidate.c | 8 | ||||
| -rw-r--r-- | src/backend/commands/opclasscmds.c | 30 | ||||
| -rw-r--r-- | src/backend/utils/adt/datum.c | 26 | ||||
| -rw-r--r-- | src/backend/utils/adt/varlena.c | 20 |
5 files changed, 152 insertions, 5 deletions
diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index 5ab4e712f12..af07732eabc 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -20,6 +20,7 @@ #include "access/nbtree.h" #include "access/reloptions.h" #include "access/relscan.h" +#include "catalog/catalog.h" #include "commands/progress.h" #include "lib/qunique.h" #include "miscadmin.h" @@ -2566,3 +2567,75 @@ _bt_check_third_page(Relation rel, Relation heap, bool needheaptidspace, "or use full text indexing."), errtableconstraint(heap, RelationGetRelationName(rel)))); } + +/* + * Are all attributes in rel "equality is image equality" attributes? + * + * We use each attribute's BTEQUALIMAGE_PROC opclass procedure. If any + * opclass either lacks a BTEQUALIMAGE_PROC procedure or returns false, we + * return false; otherwise we return true. + * + * Returned boolean value is stored in index metapage during index builds. + * Deduplication can only be used when we return true. + */ +bool +_bt_allequalimage(Relation rel, bool debugmessage) +{ + bool allequalimage = true; + + /* INCLUDE indexes don't support deduplication */ + if (IndexRelationGetNumberOfAttributes(rel) != + IndexRelationGetNumberOfKeyAttributes(rel)) + return false; + + /* + * There is no special reason why deduplication cannot work with system + * relations (i.e. with system catalog indexes and TOAST indexes). We + * deem deduplication unsafe for these indexes all the same, since the + * alternative is to force users to always use deduplication, without + * being able to opt out. (ALTER INDEX is not supported with system + * indexes, so users would have no way to set the deduplicate_items + * storage parameter to 'off'.) + */ + if (IsSystemRelation(rel)) + return false; + + for (int i = 0; i < IndexRelationGetNumberOfKeyAttributes(rel); i++) + { + Oid opfamily = rel->rd_opfamily[i]; + Oid opcintype = rel->rd_opcintype[i]; + Oid collation = rel->rd_indcollation[i]; + Oid equalimageproc; + + equalimageproc = get_opfamily_proc(opfamily, opcintype, opcintype, + BTEQUALIMAGE_PROC); + + /* + * If there is no BTEQUALIMAGE_PROC then deduplication is assumed to + * be unsafe. Otherwise, actually call proc and see what it says. + */ + if (!OidIsValid(equalimageproc) || + !DatumGetBool(OidFunctionCall1Coll(equalimageproc, collation, + ObjectIdGetDatum(opcintype)))) + { + allequalimage = false; + break; + } + } + + /* + * Don't elog() until here to avoid reporting on a system relation index + * or an INCLUDE index + */ + if (debugmessage) + { + if (allequalimage) + elog(DEBUG1, "index \"%s\" can safely use deduplication", + RelationGetRelationName(rel)); + else + elog(DEBUG1, "index \"%s\" cannot use deduplication", + RelationGetRelationName(rel)); + } + + return allequalimage; +} diff --git a/src/backend/access/nbtree/nbtvalidate.c b/src/backend/access/nbtree/nbtvalidate.c index ff634b16499..627f74407a3 100644 --- a/src/backend/access/nbtree/nbtvalidate.c +++ b/src/backend/access/nbtree/nbtvalidate.c @@ -104,6 +104,10 @@ btvalidate(Oid opclassoid) procform->amprocrighttype, BOOLOID, BOOLOID); break; + case BTEQUALIMAGE_PROC: + ok = check_amproc_signature(procform->amproc, BOOLOID, true, + 1, 1, OIDOID); + break; default: ereport(INFO, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), @@ -211,8 +215,8 @@ btvalidate(Oid opclassoid) /* * Complain if there seems to be an incomplete set of either operators - * or support functions for this datatype pair. The only things - * considered optional are the sortsupport and in_range functions. + * or support functions for this datatype pair. The sortsupport, + * in_range, and equalimage functions are considered optional. */ if (thisgroup->operatorset != ((1 << BTLessStrategyNumber) | diff --git a/src/backend/commands/opclasscmds.c b/src/backend/commands/opclasscmds.c index e2c6de457cf..743511bdf21 100644 --- a/src/backend/commands/opclasscmds.c +++ b/src/backend/commands/opclasscmds.c @@ -1143,9 +1143,10 @@ assignProcTypes(OpFamilyMember *member, Oid amoid, Oid typeoid) /* * btree comparison procs must be 2-arg procs returning int4. btree * sortsupport procs must take internal and return void. btree in_range - * procs must be 5-arg procs returning bool. hash support proc 1 must be - * a 1-arg proc returning int4, while proc 2 must be a 2-arg proc - * returning int8. Otherwise we don't know. + * procs must be 5-arg procs returning bool. btree equalimage procs must + * take 1 arg and return bool. hash support proc 1 must be a 1-arg proc + * returning int4, while proc 2 must be a 2-arg proc returning int8. + * Otherwise we don't know. */ if (amoid == BTREE_AM_OID) { @@ -1205,6 +1206,29 @@ assignProcTypes(OpFamilyMember *member, Oid amoid, Oid typeoid) if (!OidIsValid(member->righttype)) member->righttype = procform->proargtypes.values[2]; } + else if (member->number == BTEQUALIMAGE_PROC) + { + if (procform->pronargs != 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("btree equal image functions must have one argument"))); + if (procform->prorettype != BOOLOID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("btree equal image functions must return boolean"))); + /* + * pg_amproc functions are indexed by (lefttype, righttype), but + * an equalimage function can only be called at CREATE INDEX time. + * The same opclass opcintype OID is always used for leftype and + * righttype. Providing a cross-type routine isn't sensible. + * Reject cross-type ALTER OPERATOR FAMILY ... ADD FUNCTION 4 + * statements here. + */ + if (member->lefttype != member->righttype) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("btree equal image functions must not be cross-type"))); + } } else if (amoid == HASH_AM_OID) { diff --git a/src/backend/utils/adt/datum.c b/src/backend/utils/adt/datum.c index 4e819473520..34cdde1bb91 100644 --- a/src/backend/utils/adt/datum.c +++ b/src/backend/utils/adt/datum.c @@ -44,6 +44,7 @@ #include "access/detoast.h" #include "fmgr.h" +#include "utils/builtins.h" #include "utils/datum.h" #include "utils/expandeddatum.h" @@ -324,6 +325,31 @@ datum_image_eq(Datum value1, Datum value2, bool typByVal, int typLen) } /*------------------------------------------------------------------------- + * btequalimage + * + * Generic "equalimage" support function. + * + * B-Tree operator classes whose equality function could safely be replaced by + * datum_image_eq() in all cases can use this as their "equalimage" support + * function. + * + * Currently, we unconditionally assume that any B-Tree operator class that + * registers btequalimage as its support function 4 must be able to safely use + * optimizations like deduplication (i.e. we return true unconditionally). If + * it ever proved necessary to rescind support for an operator class, we could + * do that in a targeted fashion by doing something with the opcintype + * argument. + *------------------------------------------------------------------------- + */ +Datum +btequalimage(PG_FUNCTION_ARGS) +{ + /* Oid opcintype = PG_GETARG_OID(0); */ + + PG_RETURN_BOOL(true); +} + +/*------------------------------------------------------------------------- * datumEstimateSpace * * Compute the amount of space that datumSerialize will require for a diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 1b351cbc688..875b02d6439 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -2783,6 +2783,26 @@ varstr_abbrev_abort(int memtupcount, SortSupport ssup) return true; } +/* + * Generic equalimage support function for character type's operator classes. + * Disables the use of deduplication with nondeterministic collations. + */ +Datum +btvarstrequalimage(PG_FUNCTION_ARGS) +{ + /* Oid opcintype = PG_GETARG_OID(0); */ + Oid collid = PG_GET_COLLATION(); + + check_collation_set(collid); + + if (lc_collate_is_c(collid) || + collid == DEFAULT_COLLATION_OID || + get_collation_isdeterministic(collid)) + PG_RETURN_BOOL(true); + else + PG_RETURN_BOOL(false); +} + Datum text_larger(PG_FUNCTION_ARGS) { |
