summaryrefslogtreecommitdiff
path: root/src/backend
diff options
context:
space:
mode:
authorPeter Geoghegan2020-02-26 19:28:25 +0000
committerPeter Geoghegan2020-02-26 19:28:25 +0000
commit612a1ab76724aa1514b6509269342649f8cab375 (patch)
treedf13756515fd71d6528958f2315123d89d41b817 /src/backend
parent4109bb5de4998b9301ea2ac18c9d6dfb0b4f900b (diff)
Add equalimage B-Tree support functions.
Invent the concept of a B-Tree equalimage ("equality implies image equality") support function, registered as support function 4. This indicates whether it is safe (or not safe) to apply optimizations that assume that any two datums considered equal by an operator class's order method must be interchangeable without any loss of semantic information. This is static information about an operator class and a collation. Register an equalimage routine for almost all of the existing B-Tree opclasses. We only need two trivial routines for all of the opclasses that are included with the core distribution. There is one routine for opclasses that index non-collatable types (which returns 'true' unconditionally), plus another routine for collatable types (which returns 'true' when the collation is a deterministic collation). This patch is infrastructure for an upcoming patch that adds B-Tree deduplication. Author: Peter Geoghegan, Anastasia Lubennikova Discussion: https://postgr.es/m/CAH2-Wzn3Ee49Gmxb7V1VJ3-AC8fWn-Fr8pfWQebHe8rYRxt5OQ@mail.gmail.com
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/access/nbtree/nbtutils.c73
-rw-r--r--src/backend/access/nbtree/nbtvalidate.c8
-rw-r--r--src/backend/commands/opclasscmds.c30
-rw-r--r--src/backend/utils/adt/datum.c26
-rw-r--r--src/backend/utils/adt/varlena.c20
5 files changed, 152 insertions, 5 deletions
diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c
index 5ab4e712f12..af07732eabc 100644
--- a/src/backend/access/nbtree/nbtutils.c
+++ b/src/backend/access/nbtree/nbtutils.c
@@ -20,6 +20,7 @@
#include "access/nbtree.h"
#include "access/reloptions.h"
#include "access/relscan.h"
+#include "catalog/catalog.h"
#include "commands/progress.h"
#include "lib/qunique.h"
#include "miscadmin.h"
@@ -2566,3 +2567,75 @@ _bt_check_third_page(Relation rel, Relation heap, bool needheaptidspace,
"or use full text indexing."),
errtableconstraint(heap, RelationGetRelationName(rel))));
}
+
+/*
+ * Are all attributes in rel "equality is image equality" attributes?
+ *
+ * We use each attribute's BTEQUALIMAGE_PROC opclass procedure. If any
+ * opclass either lacks a BTEQUALIMAGE_PROC procedure or returns false, we
+ * return false; otherwise we return true.
+ *
+ * Returned boolean value is stored in index metapage during index builds.
+ * Deduplication can only be used when we return true.
+ */
+bool
+_bt_allequalimage(Relation rel, bool debugmessage)
+{
+ bool allequalimage = true;
+
+ /* INCLUDE indexes don't support deduplication */
+ if (IndexRelationGetNumberOfAttributes(rel) !=
+ IndexRelationGetNumberOfKeyAttributes(rel))
+ return false;
+
+ /*
+ * There is no special reason why deduplication cannot work with system
+ * relations (i.e. with system catalog indexes and TOAST indexes). We
+ * deem deduplication unsafe for these indexes all the same, since the
+ * alternative is to force users to always use deduplication, without
+ * being able to opt out. (ALTER INDEX is not supported with system
+ * indexes, so users would have no way to set the deduplicate_items
+ * storage parameter to 'off'.)
+ */
+ if (IsSystemRelation(rel))
+ return false;
+
+ for (int i = 0; i < IndexRelationGetNumberOfKeyAttributes(rel); i++)
+ {
+ Oid opfamily = rel->rd_opfamily[i];
+ Oid opcintype = rel->rd_opcintype[i];
+ Oid collation = rel->rd_indcollation[i];
+ Oid equalimageproc;
+
+ equalimageproc = get_opfamily_proc(opfamily, opcintype, opcintype,
+ BTEQUALIMAGE_PROC);
+
+ /*
+ * If there is no BTEQUALIMAGE_PROC then deduplication is assumed to
+ * be unsafe. Otherwise, actually call proc and see what it says.
+ */
+ if (!OidIsValid(equalimageproc) ||
+ !DatumGetBool(OidFunctionCall1Coll(equalimageproc, collation,
+ ObjectIdGetDatum(opcintype))))
+ {
+ allequalimage = false;
+ break;
+ }
+ }
+
+ /*
+ * Don't elog() until here to avoid reporting on a system relation index
+ * or an INCLUDE index
+ */
+ if (debugmessage)
+ {
+ if (allequalimage)
+ elog(DEBUG1, "index \"%s\" can safely use deduplication",
+ RelationGetRelationName(rel));
+ else
+ elog(DEBUG1, "index \"%s\" cannot use deduplication",
+ RelationGetRelationName(rel));
+ }
+
+ return allequalimage;
+}
diff --git a/src/backend/access/nbtree/nbtvalidate.c b/src/backend/access/nbtree/nbtvalidate.c
index ff634b16499..627f74407a3 100644
--- a/src/backend/access/nbtree/nbtvalidate.c
+++ b/src/backend/access/nbtree/nbtvalidate.c
@@ -104,6 +104,10 @@ btvalidate(Oid opclassoid)
procform->amprocrighttype,
BOOLOID, BOOLOID);
break;
+ case BTEQUALIMAGE_PROC:
+ ok = check_amproc_signature(procform->amproc, BOOLOID, true,
+ 1, 1, OIDOID);
+ break;
default:
ereport(INFO,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
@@ -211,8 +215,8 @@ btvalidate(Oid opclassoid)
/*
* Complain if there seems to be an incomplete set of either operators
- * or support functions for this datatype pair. The only things
- * considered optional are the sortsupport and in_range functions.
+ * or support functions for this datatype pair. The sortsupport,
+ * in_range, and equalimage functions are considered optional.
*/
if (thisgroup->operatorset !=
((1 << BTLessStrategyNumber) |
diff --git a/src/backend/commands/opclasscmds.c b/src/backend/commands/opclasscmds.c
index e2c6de457cf..743511bdf21 100644
--- a/src/backend/commands/opclasscmds.c
+++ b/src/backend/commands/opclasscmds.c
@@ -1143,9 +1143,10 @@ assignProcTypes(OpFamilyMember *member, Oid amoid, Oid typeoid)
/*
* btree comparison procs must be 2-arg procs returning int4. btree
* sortsupport procs must take internal and return void. btree in_range
- * procs must be 5-arg procs returning bool. hash support proc 1 must be
- * a 1-arg proc returning int4, while proc 2 must be a 2-arg proc
- * returning int8. Otherwise we don't know.
+ * procs must be 5-arg procs returning bool. btree equalimage procs must
+ * take 1 arg and return bool. hash support proc 1 must be a 1-arg proc
+ * returning int4, while proc 2 must be a 2-arg proc returning int8.
+ * Otherwise we don't know.
*/
if (amoid == BTREE_AM_OID)
{
@@ -1205,6 +1206,29 @@ assignProcTypes(OpFamilyMember *member, Oid amoid, Oid typeoid)
if (!OidIsValid(member->righttype))
member->righttype = procform->proargtypes.values[2];
}
+ else if (member->number == BTEQUALIMAGE_PROC)
+ {
+ if (procform->pronargs != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("btree equal image functions must have one argument")));
+ if (procform->prorettype != BOOLOID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("btree equal image functions must return boolean")));
+ /*
+ * pg_amproc functions are indexed by (lefttype, righttype), but
+ * an equalimage function can only be called at CREATE INDEX time.
+ * The same opclass opcintype OID is always used for leftype and
+ * righttype. Providing a cross-type routine isn't sensible.
+ * Reject cross-type ALTER OPERATOR FAMILY ... ADD FUNCTION 4
+ * statements here.
+ */
+ if (member->lefttype != member->righttype)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("btree equal image functions must not be cross-type")));
+ }
}
else if (amoid == HASH_AM_OID)
{
diff --git a/src/backend/utils/adt/datum.c b/src/backend/utils/adt/datum.c
index 4e819473520..34cdde1bb91 100644
--- a/src/backend/utils/adt/datum.c
+++ b/src/backend/utils/adt/datum.c
@@ -44,6 +44,7 @@
#include "access/detoast.h"
#include "fmgr.h"
+#include "utils/builtins.h"
#include "utils/datum.h"
#include "utils/expandeddatum.h"
@@ -324,6 +325,31 @@ datum_image_eq(Datum value1, Datum value2, bool typByVal, int typLen)
}
/*-------------------------------------------------------------------------
+ * btequalimage
+ *
+ * Generic "equalimage" support function.
+ *
+ * B-Tree operator classes whose equality function could safely be replaced by
+ * datum_image_eq() in all cases can use this as their "equalimage" support
+ * function.
+ *
+ * Currently, we unconditionally assume that any B-Tree operator class that
+ * registers btequalimage as its support function 4 must be able to safely use
+ * optimizations like deduplication (i.e. we return true unconditionally). If
+ * it ever proved necessary to rescind support for an operator class, we could
+ * do that in a targeted fashion by doing something with the opcintype
+ * argument.
+ *-------------------------------------------------------------------------
+ */
+Datum
+btequalimage(PG_FUNCTION_ARGS)
+{
+ /* Oid opcintype = PG_GETARG_OID(0); */
+
+ PG_RETURN_BOOL(true);
+}
+
+/*-------------------------------------------------------------------------
* datumEstimateSpace
*
* Compute the amount of space that datumSerialize will require for a
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 1b351cbc688..875b02d6439 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -2783,6 +2783,26 @@ varstr_abbrev_abort(int memtupcount, SortSupport ssup)
return true;
}
+/*
+ * Generic equalimage support function for character type's operator classes.
+ * Disables the use of deduplication with nondeterministic collations.
+ */
+Datum
+btvarstrequalimage(PG_FUNCTION_ARGS)
+{
+ /* Oid opcintype = PG_GETARG_OID(0); */
+ Oid collid = PG_GET_COLLATION();
+
+ check_collation_set(collid);
+
+ if (lc_collate_is_c(collid) ||
+ collid == DEFAULT_COLLATION_OID ||
+ get_collation_isdeterministic(collid))
+ PG_RETURN_BOOL(true);
+ else
+ PG_RETURN_BOOL(false);
+}
+
Datum
text_larger(PG_FUNCTION_ARGS)
{