diff options
| author | Teodor Sigaev | 2018-04-03 16:46:45 +0000 |
|---|---|---|
| committer | Teodor Sigaev | 2018-04-03 16:46:45 +0000 |
| commit | 710d90da1fd8c1d028215ecaf7402062079e99e9 (patch) | |
| tree | 6fb5d284edb7a9c37e3ae633c92779a65d1b97c6 /src/backend | |
| parent | 4ab299981576ca0f3dbb879b5e2b704e743d87f3 (diff) | |
Add prefix operator for TEXT type.
The prefix operator along with SP-GiST indexes can be used as an alternative
for LIKE 'word%' commands and it doesn't have a limitation of string/prefix
length as B-Tree has.
Bump catalog version
Author: Ildus Kurbangaliev with some editorization by me
Review by: Arthur Zakirov, Alexander Korotkov, and me
Discussion: https://www.postgresql.org/message-id/flat/20180202180327.222b04b3@wp.localdomain
Diffstat (limited to 'src/backend')
| -rw-r--r-- | src/backend/access/spgist/spgtextproc.c | 43 | ||||
| -rw-r--r-- | src/backend/utils/adt/selfuncs.c | 33 | ||||
| -rw-r--r-- | src/backend/utils/adt/varlena.c | 28 |
3 files changed, 100 insertions, 4 deletions
diff --git a/src/backend/access/spgist/spgtextproc.c b/src/backend/access/spgist/spgtextproc.c index f156b2166e6..76c0305695b 100644 --- a/src/backend/access/spgist/spgtextproc.c +++ b/src/backend/access/spgist/spgtextproc.c @@ -67,6 +67,20 @@ */ #define SPGIST_MAX_PREFIX_LENGTH Max((int) (BLCKSZ - 258 * 16 - 100), 32) +/* + * Strategy for collation aware operator on text is equal to btree strategy + * plus value of 10. + * + * Current collation aware strategies and their corresponding btree strategies: + * 11 BTLessStrategyNumber + * 12 BTLessEqualStrategyNumber + * 14 BTGreaterEqualStrategyNumber + * 15 BTGreaterStrategyNumber + */ +#define SPG_STRATEGY_ADDITION (10) +#define SPG_IS_COLLATION_AWARE_STRATEGY(s) ((s) > SPG_STRATEGY_ADDITION \ + && (s) != RTPrefixStrategyNumber) + /* Struct for sorting values in picksplit */ typedef struct spgNodePtr { @@ -496,10 +510,10 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS) * well end with a partial multibyte character, so that applying * any encoding-sensitive test to it would be risky anyhow.) */ - if (strategy > 10) + if (SPG_IS_COLLATION_AWARE_STRATEGY(strategy)) { if (collate_is_c) - strategy -= 10; + strategy -= SPG_STRATEGY_ADDITION; else continue; } @@ -526,6 +540,10 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS) if (r < 0) res = false; break; + case RTPrefixStrategyNumber: + if (r != 0) + res = false; + break; default: elog(ERROR, "unrecognized strategy number: %d", in->scankeys[j].sk_strategy); @@ -605,10 +623,27 @@ spg_text_leaf_consistent(PG_FUNCTION_ARGS) int queryLen = VARSIZE_ANY_EXHDR(query); int r; - if (strategy > 10) + if (strategy == RTPrefixStrategyNumber) + { + /* + * if level >= length of query then reconstrValue is began with + * query (prefix) string and we don't need to check it again. + */ + + res = (level >= queryLen) || + DatumGetBool(DirectFunctionCall2(text_starts_with, + out->leafValue, PointerGetDatum(query))); + + if (!res) /* no need to consider remaining conditions */ + break; + + continue; + } + + if (SPG_IS_COLLATION_AWARE_STRATEGY(strategy)) { /* Collation-aware comparison */ - strategy -= 10; + strategy -= SPG_STRATEGY_ADDITION; /* If asserts enabled, verify encoding of reconstructed string */ Assert(pg_verifymbstr(fullValue, fullLen, false)); diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index bf240aa9c5a..f998d859c1c 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -1488,6 +1488,16 @@ likesel(PG_FUNCTION_ARGS) } /* + * prefixsel - selectivity of prefix operator + */ +Datum +prefixsel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Prefix, false)); +} + +/* + * * iclikesel - Selectivity of ILIKE pattern match. */ Datum @@ -2907,6 +2917,15 @@ likejoinsel(PG_FUNCTION_ARGS) } /* + * prefixjoinsel - Join selectivity of prefix operator + */ +Datum +prefixjoinsel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Prefix, false)); +} + +/* * iclikejoinsel - Join selectivity of ILIKE pattern match. */ Datum @@ -5947,6 +5966,20 @@ pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation, result = regex_fixed_prefix(patt, true, collation, prefix, rest_selec); break; + case Pattern_Type_Prefix: + /* Prefix type work is trivial. */ + result = Pattern_Prefix_Partial; + *rest_selec = 1.0; /* all */ + *prefix = makeConst(patt->consttype, + patt->consttypmod, + patt->constcollid, + patt->constlen, + datumCopy(patt->constvalue, + patt->constbyval, + patt->constlen), + patt->constisnull, + patt->constbyval); + break; default: elog(ERROR, "unrecognized ptype: %d", (int) ptype); result = Pattern_Prefix_None; /* keep compiler quiet */ diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 4346410d5a9..e8500b274dc 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -1762,6 +1762,34 @@ text_ge(PG_FUNCTION_ARGS) } Datum +text_starts_with(PG_FUNCTION_ARGS) +{ + Datum arg1 = PG_GETARG_DATUM(0); + Datum arg2 = PG_GETARG_DATUM(1); + bool result; + Size len1, + len2; + + len1 = toast_raw_datum_size(arg1); + len2 = toast_raw_datum_size(arg2); + if (len2 > len1) + result = false; + else + { + text *targ1 = DatumGetTextPP(arg1); + text *targ2 = DatumGetTextPP(arg2); + + result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2), + VARSIZE_ANY_EXHDR(targ2)) == 0); + + PG_FREE_IF_COPY(targ1, 0); + PG_FREE_IF_COPY(targ2, 1); + } + + PG_RETURN_BOOL(result); +} + +Datum bttextcmp(PG_FUNCTION_ARGS) { text *arg1 = PG_GETARG_TEXT_PP(0); |
