summaryrefslogtreecommitdiff
path: root/src/backend
diff options
context:
space:
mode:
authorTeodor Sigaev2018-04-03 16:46:45 +0000
committerTeodor Sigaev2018-04-03 16:46:45 +0000
commit710d90da1fd8c1d028215ecaf7402062079e99e9 (patch)
tree6fb5d284edb7a9c37e3ae633c92779a65d1b97c6 /src/backend
parent4ab299981576ca0f3dbb879b5e2b704e743d87f3 (diff)
Add prefix operator for TEXT type.
The prefix operator along with SP-GiST indexes can be used as an alternative for LIKE 'word%' commands and it doesn't have a limitation of string/prefix length as B-Tree has. Bump catalog version Author: Ildus Kurbangaliev with some editorization by me Review by: Arthur Zakirov, Alexander Korotkov, and me Discussion: https://www.postgresql.org/message-id/flat/20180202180327.222b04b3@wp.localdomain
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/access/spgist/spgtextproc.c43
-rw-r--r--src/backend/utils/adt/selfuncs.c33
-rw-r--r--src/backend/utils/adt/varlena.c28
3 files changed, 100 insertions, 4 deletions
diff --git a/src/backend/access/spgist/spgtextproc.c b/src/backend/access/spgist/spgtextproc.c
index f156b2166e6..76c0305695b 100644
--- a/src/backend/access/spgist/spgtextproc.c
+++ b/src/backend/access/spgist/spgtextproc.c
@@ -67,6 +67,20 @@
*/
#define SPGIST_MAX_PREFIX_LENGTH Max((int) (BLCKSZ - 258 * 16 - 100), 32)
+/*
+ * Strategy for collation aware operator on text is equal to btree strategy
+ * plus value of 10.
+ *
+ * Current collation aware strategies and their corresponding btree strategies:
+ * 11 BTLessStrategyNumber
+ * 12 BTLessEqualStrategyNumber
+ * 14 BTGreaterEqualStrategyNumber
+ * 15 BTGreaterStrategyNumber
+ */
+#define SPG_STRATEGY_ADDITION (10)
+#define SPG_IS_COLLATION_AWARE_STRATEGY(s) ((s) > SPG_STRATEGY_ADDITION \
+ && (s) != RTPrefixStrategyNumber)
+
/* Struct for sorting values in picksplit */
typedef struct spgNodePtr
{
@@ -496,10 +510,10 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS)
* well end with a partial multibyte character, so that applying
* any encoding-sensitive test to it would be risky anyhow.)
*/
- if (strategy > 10)
+ if (SPG_IS_COLLATION_AWARE_STRATEGY(strategy))
{
if (collate_is_c)
- strategy -= 10;
+ strategy -= SPG_STRATEGY_ADDITION;
else
continue;
}
@@ -526,6 +540,10 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS)
if (r < 0)
res = false;
break;
+ case RTPrefixStrategyNumber:
+ if (r != 0)
+ res = false;
+ break;
default:
elog(ERROR, "unrecognized strategy number: %d",
in->scankeys[j].sk_strategy);
@@ -605,10 +623,27 @@ spg_text_leaf_consistent(PG_FUNCTION_ARGS)
int queryLen = VARSIZE_ANY_EXHDR(query);
int r;
- if (strategy > 10)
+ if (strategy == RTPrefixStrategyNumber)
+ {
+ /*
+ * if level >= length of query then reconstrValue is began with
+ * query (prefix) string and we don't need to check it again.
+ */
+
+ res = (level >= queryLen) ||
+ DatumGetBool(DirectFunctionCall2(text_starts_with,
+ out->leafValue, PointerGetDatum(query)));
+
+ if (!res) /* no need to consider remaining conditions */
+ break;
+
+ continue;
+ }
+
+ if (SPG_IS_COLLATION_AWARE_STRATEGY(strategy))
{
/* Collation-aware comparison */
- strategy -= 10;
+ strategy -= SPG_STRATEGY_ADDITION;
/* If asserts enabled, verify encoding of reconstructed string */
Assert(pg_verifymbstr(fullValue, fullLen, false));
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index bf240aa9c5a..f998d859c1c 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -1488,6 +1488,16 @@ likesel(PG_FUNCTION_ARGS)
}
/*
+ * prefixsel - selectivity of prefix operator
+ */
+Datum
+prefixsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Prefix, false));
+}
+
+/*
+ *
* iclikesel - Selectivity of ILIKE pattern match.
*/
Datum
@@ -2907,6 +2917,15 @@ likejoinsel(PG_FUNCTION_ARGS)
}
/*
+ * prefixjoinsel - Join selectivity of prefix operator
+ */
+Datum
+prefixjoinsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Prefix, false));
+}
+
+/*
* iclikejoinsel - Join selectivity of ILIKE pattern match.
*/
Datum
@@ -5947,6 +5966,20 @@ pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
result = regex_fixed_prefix(patt, true, collation,
prefix, rest_selec);
break;
+ case Pattern_Type_Prefix:
+ /* Prefix type work is trivial. */
+ result = Pattern_Prefix_Partial;
+ *rest_selec = 1.0; /* all */
+ *prefix = makeConst(patt->consttype,
+ patt->consttypmod,
+ patt->constcollid,
+ patt->constlen,
+ datumCopy(patt->constvalue,
+ patt->constbyval,
+ patt->constlen),
+ patt->constisnull,
+ patt->constbyval);
+ break;
default:
elog(ERROR, "unrecognized ptype: %d", (int) ptype);
result = Pattern_Prefix_None; /* keep compiler quiet */
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 4346410d5a9..e8500b274dc 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -1762,6 +1762,34 @@ text_ge(PG_FUNCTION_ARGS)
}
Datum
+text_starts_with(PG_FUNCTION_ARGS)
+{
+ Datum arg1 = PG_GETARG_DATUM(0);
+ Datum arg2 = PG_GETARG_DATUM(1);
+ bool result;
+ Size len1,
+ len2;
+
+ len1 = toast_raw_datum_size(arg1);
+ len2 = toast_raw_datum_size(arg2);
+ if (len2 > len1)
+ result = false;
+ else
+ {
+ text *targ1 = DatumGetTextPP(arg1);
+ text *targ2 = DatumGetTextPP(arg2);
+
+ result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
+ VARSIZE_ANY_EXHDR(targ2)) == 0);
+
+ PG_FREE_IF_COPY(targ1, 0);
+ PG_FREE_IF_COPY(targ2, 1);
+ }
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
bttextcmp(PG_FUNCTION_ARGS)
{
text *arg1 = PG_GETARG_TEXT_PP(0);