From 3a0d473192b2045cbaf997df8437e7762d34f3ba Mon Sep 17 00:00:00 2001 From: Noah Misch Date: Sun, 12 Mar 2017 19:35:34 -0400 Subject: Use wrappers of PG_DETOAST_DATUM_PACKED() more. This makes almost all core code follow the policy introduced in the previous commit. Specific decisions: - Text search support functions with char* and length arguments, such as prsstart and lexize, may receive unaligned strings. I doubt maintainers of non-core text search code will notice. - Use plain VARDATA() on values detoasted or synthesized earlier in the same function. Use VARDATA_ANY() on varlenas sourced outside the function, even if they happen to always have four-byte headers. As an exception, retain the universal practice of using VARDATA() on return values of SendFunctionCall(). - Retain PG_GETARG_BYTEA_P() in pageinspect. (Page images are too large for a one-byte header, so this misses no optimization.) Sites that do not call get_page_from_raw() typically need the four-byte alignment. - For now, do not change btree_gist. Its use of four-byte headers in memory is partly entangled with storage of 4-byte headers inside GBT_VARKEY, on disk. - For now, do not change gtrgm_consistent() or gtrgm_distance(). They incorporate the varlena header into a cache, and there are multiple credible implementation strategies to consider. --- contrib/pg_trgm/trgm_gin.c | 15 ++++++++------- contrib/pg_trgm/trgm_gist.c | 17 +++++++++-------- contrib/pg_trgm/trgm_op.c | 12 ++++++------ 3 files changed, 23 insertions(+), 21 deletions(-) (limited to 'contrib/pg_trgm') diff --git a/contrib/pg_trgm/trgm_gin.c b/contrib/pg_trgm/trgm_gin.c index ead33ef5447..e4b3daea446 100644 --- a/contrib/pg_trgm/trgm_gin.c +++ b/contrib/pg_trgm/trgm_gin.c @@ -35,7 +35,7 @@ gin_extract_trgm(PG_FUNCTION_ARGS) Datum gin_extract_value_trgm(PG_FUNCTION_ARGS) { - text *val = (text *) PG_GETARG_TEXT_P(0); + text *val = (text *) PG_GETARG_TEXT_PP(0); int32 *nentries = (int32 *) PG_GETARG_POINTER(1); Datum *entries = NULL; TRGM *trg; @@ -43,7 +43,7 @@ gin_extract_value_trgm(PG_FUNCTION_ARGS) *nentries = 0; - trg = generate_trgm(VARDATA(val), VARSIZE(val) - VARHDRSZ); + trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val)); trglen = ARRNELEM(trg); if (trglen > 0) @@ -70,7 +70,7 @@ gin_extract_value_trgm(PG_FUNCTION_ARGS) Datum gin_extract_query_trgm(PG_FUNCTION_ARGS) { - text *val = (text *) PG_GETARG_TEXT_P(0); + text *val = (text *) PG_GETARG_TEXT_PP(0); int32 *nentries = (int32 *) PG_GETARG_POINTER(1); StrategyNumber strategy = PG_GETARG_UINT16(2); @@ -90,7 +90,7 @@ gin_extract_query_trgm(PG_FUNCTION_ARGS) { case SimilarityStrategyNumber: case WordSimilarityStrategyNumber: - trg = generate_trgm(VARDATA(val), VARSIZE(val) - VARHDRSZ); + trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val)); break; case ILikeStrategyNumber: #ifndef IGNORECASE @@ -103,7 +103,8 @@ gin_extract_query_trgm(PG_FUNCTION_ARGS) * For wildcard search we extract all the trigrams that every * potentially-matching string must include. */ - trg = generate_wildcard_trgm(VARDATA(val), VARSIZE(val) - VARHDRSZ); + trg = generate_wildcard_trgm(VARDATA_ANY(val), + VARSIZE_ANY_EXHDR(val)); break; case RegExpICaseStrategyNumber: #ifndef IGNORECASE @@ -170,7 +171,7 @@ gin_trgm_consistent(PG_FUNCTION_ARGS) bool *check = (bool *) PG_GETARG_POINTER(0); StrategyNumber strategy = PG_GETARG_UINT16(1); - /* text *query = PG_GETARG_TEXT_P(2); */ + /* text *query = PG_GETARG_TEXT_PP(2); */ int32 nkeys = PG_GETARG_INT32(3); Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); bool *recheck = (bool *) PG_GETARG_POINTER(5); @@ -268,7 +269,7 @@ gin_trgm_triconsistent(PG_FUNCTION_ARGS) GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0); StrategyNumber strategy = PG_GETARG_UINT16(1); - /* text *query = PG_GETARG_TEXT_P(2); */ + /* text *query = PG_GETARG_TEXT_PP(2); */ int32 nkeys = PG_GETARG_INT32(3); Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); GinTernaryValue res = GIN_MAYBE; diff --git a/contrib/pg_trgm/trgm_gist.c b/contrib/pg_trgm/trgm_gist.c index f52867df324..ed02af875ca 100644 --- a/contrib/pg_trgm/trgm_gist.c +++ b/contrib/pg_trgm/trgm_gist.c @@ -100,9 +100,9 @@ gtrgm_compress(PG_FUNCTION_ARGS) if (entry->leafkey) { /* trgm */ TRGM *res; - text *val = DatumGetTextP(entry->key); + text *val = DatumGetTextPP(entry->key); - res = generate_trgm(VARDATA(val), VARSIZE(val) - VARHDRSZ); + res = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val)); retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); gistentryinit(*retval, PointerGetDatum(res), entry->rel, entry->page, @@ -142,7 +142,7 @@ gtrgm_decompress(PG_FUNCTION_ARGS) GISTENTRY *retval; text *key; - key = DatumGetTextP(entry->key); + key = DatumGetTextPP(entry->key); if (key != (text *) DatumGetPointer(entry->key)) { @@ -200,11 +200,12 @@ gtrgm_consistent(PG_FUNCTION_ARGS) * depends on strategy. * * The cached structure is a single palloc chunk containing the - * gtrgm_consistent_cache header, then the input query (starting at a - * MAXALIGN boundary), then the TRGM value (also starting at a MAXALIGN - * boundary). However we don't try to include the regex graph (if any) in - * that struct. (XXX currently, this approach can leak regex graphs - * across index rescans. Not clear if that's worth fixing.) + * gtrgm_consistent_cache header, then the input query (4-byte length + * word, uncompressed, starting at a MAXALIGN boundary), then the TRGM + * value (also starting at a MAXALIGN boundary). However we don't try to + * include the regex graph (if any) in that struct. (XXX currently, this + * approach can leak regex graphs across index rescans. Not clear if + * that's worth fixing.) */ cache = (gtrgm_consistent_cache *) fcinfo->flinfo->fn_extra; if (cache == NULL || diff --git a/contrib/pg_trgm/trgm_op.c b/contrib/pg_trgm/trgm_op.c index 368e7c8941d..e9a713113ed 100644 --- a/contrib/pg_trgm/trgm_op.c +++ b/contrib/pg_trgm/trgm_op.c @@ -878,14 +878,14 @@ trgm2int(trgm *ptr) Datum show_trgm(PG_FUNCTION_ARGS) { - text *in = PG_GETARG_TEXT_P(0); + text *in = PG_GETARG_TEXT_PP(0); TRGM *trg; Datum *d; ArrayType *a; trgm *ptr; int i; - trg = generate_trgm(VARDATA(in), VARSIZE(in) - VARHDRSZ); + trg = generate_trgm(VARDATA_ANY(in), VARSIZE_ANY_EXHDR(in)); d = (Datum *) palloc(sizeof(Datum) * (1 + ARRNELEM(trg))); for (i = 0, ptr = GETARR(trg); i < ARRNELEM(trg); i++, ptr++) @@ -1053,14 +1053,14 @@ trgm_presence_map(TRGM *query, TRGM *key) Datum similarity(PG_FUNCTION_ARGS) { - text *in1 = PG_GETARG_TEXT_P(0); - text *in2 = PG_GETARG_TEXT_P(1); + text *in1 = PG_GETARG_TEXT_PP(0); + text *in2 = PG_GETARG_TEXT_PP(1); TRGM *trg1, *trg2; float4 res; - trg1 = generate_trgm(VARDATA(in1), VARSIZE(in1) - VARHDRSZ); - trg2 = generate_trgm(VARDATA(in2), VARSIZE(in2) - VARHDRSZ); + trg1 = generate_trgm(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1)); + trg2 = generate_trgm(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2)); res = cnt_sml(trg1, trg2, false); -- cgit v1.2.3