diff options
| author | Heikki Linnakangas | 2014-03-12 15:13:22 +0000 |
|---|---|---|
| committer | Heikki Linnakangas | 2014-03-12 15:51:30 +0000 |
| commit | c5608ea26a1f51998ad3cf987c3f0bda643c87a8 (patch) | |
| tree | ea7370dc9696b18454f7102b3c26208455355d3c /src/backend | |
| parent | fecfc2b913c4be5eeed24b32ef51a3239580bd1e (diff) | |
Allow opclasses to provide tri-valued GIN consistent functions.
With the GIN "fast scan" feature, GIN can skip items without fetching all
the keys for them, if it can prove that they don't match regardless of
those keys. So far, it has done the proving by calling the boolean
consistent function with all combinations of TRUE/FALSE for the unfetched
keys, but since that's O(n^2), it becomes unfeasible with more than a few
keys. We can avoid calling consistent with all the combinations, if we can
tell the operator class implementation directly which keys are unknown.
This commit includes a triConsistent function for the built-in array and
tsvector opclasses.
Alexander Korotkov, with some changes by me.
Diffstat (limited to 'src/backend')
| -rw-r--r-- | src/backend/access/gin/ginarrayproc.c | 84 | ||||
| -rw-r--r-- | src/backend/access/gin/ginlogic.c | 68 | ||||
| -rw-r--r-- | src/backend/access/gin/ginutil.c | 28 | ||||
| -rw-r--r-- | src/backend/utils/adt/tsginidx.c | 104 |
4 files changed, 273 insertions, 11 deletions
diff --git a/src/backend/access/gin/ginarrayproc.c b/src/backend/access/gin/ginarrayproc.c index e02a91b82df..d04b851e3cf 100644 --- a/src/backend/access/gin/ginarrayproc.c +++ b/src/backend/access/gin/ginarrayproc.c @@ -218,3 +218,87 @@ ginarrayconsistent(PG_FUNCTION_ARGS) PG_RETURN_BOOL(res); } + +/* + * triconsistent support function + */ +Datum +ginarraytriconsistent(PG_FUNCTION_ARGS) +{ + GinLogicValue *check = (GinLogicValue *) PG_GETARG_POINTER(0); + StrategyNumber strategy = PG_GETARG_UINT16(1); + + /* ArrayType *query = PG_GETARG_ARRAYTYPE_P(2); */ + int32 nkeys = PG_GETARG_INT32(3); + + /* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */ + /* Datum *queryKeys = (Datum *) PG_GETARG_POINTER(5); */ + bool *nullFlags = (bool *) PG_GETARG_POINTER(6); + GinLogicValue res; + int32 i; + + switch (strategy) + { + case GinOverlapStrategy: + /* must have a match for at least one non-null element */ + res = GIN_FALSE; + for (i = 0; i < nkeys; i++) + { + if (!nullFlags[i]) + { + if (check[i] == GIN_TRUE) + { + res = GIN_TRUE; + break; + } + else if (check[i] == GIN_MAYBE && res == GIN_FALSE) + { + res = GIN_MAYBE; + } + } + } + break; + case GinContainsStrategy: + /* must have all elements in check[] true, and no nulls */ + res = GIN_TRUE; + for (i = 0; i < nkeys; i++) + { + if (check[i] == GIN_FALSE || nullFlags[i]) + { + res = GIN_FALSE; + break; + } + if (check[i] == GIN_MAYBE) + { + res = GIN_MAYBE; + } + } + break; + case GinContainedStrategy: + /* can't do anything else useful here */ + res = GIN_MAYBE; + break; + case GinEqualStrategy: + /* + * Must have all elements in check[] true; no discrimination + * against nulls here. This is because array_contain_compare and + * array_eq handle nulls differently ... + */ + res = GIN_MAYBE; + for (i = 0; i < nkeys; i++) + { + if (check[i] == GIN_FALSE) + { + res = GIN_FALSE; + break; + } + } + break; + default: + elog(ERROR, "ginarrayconsistent: unknown strategy number: %d", + strategy); + res = false; + } + + PG_RETURN_GIN_LOGIC_VALUE(res); +} diff --git a/src/backend/access/gin/ginlogic.c b/src/backend/access/gin/ginlogic.c index dc8e6304a2f..4c8d706361c 100644 --- a/src/backend/access/gin/ginlogic.c +++ b/src/backend/access/gin/ginlogic.c @@ -61,7 +61,7 @@ trueTriConsistentFn(GinScanKey key) * A helper function for calling a regular, binary logic, consistent function. */ static bool -normalBoolConsistentFn(GinScanKey key) +directBoolConsistentFn(GinScanKey key) { /* * Initialize recheckCurItem in case the consistentFn doesn't know it @@ -82,6 +82,53 @@ normalBoolConsistentFn(GinScanKey key) } /* + * A helper function for calling a native ternary logic consistent function. + */ +static GinLogicValue +directTriConsistentFn(GinScanKey key) +{ + return DatumGetGinLogicValue(FunctionCall7Coll(key->triConsistentFmgrInfo, + key->collation, + PointerGetDatum(key->entryRes), + UInt16GetDatum(key->strategy), + key->query, + UInt32GetDatum(key->nuserentries), + PointerGetDatum(key->extra_data), + PointerGetDatum(key->queryValues), + PointerGetDatum(key->queryCategories))); +} + +/* + * This function implements a binary logic consistency check, using a ternary + * logic consistent function provided by the opclass. GIN_MAYBE return value + * is interpreted as true with recheck flag. + */ +static bool +shimBoolConsistentFn(GinScanKey key) +{ + GinLogicValue result; + result = DatumGetGinLogicValue(FunctionCall7Coll(key->triConsistentFmgrInfo, + key->collation, + PointerGetDatum(key->entryRes), + UInt16GetDatum(key->strategy), + key->query, + UInt32GetDatum(key->nuserentries), + PointerGetDatum(key->extra_data), + PointerGetDatum(key->queryValues), + PointerGetDatum(key->queryCategories))); + if (result == GIN_MAYBE) + { + key->recheckCurItem = true; + return true; + } + else + { + key->recheckCurItem = false; + return result; + } +} + +/* * This function implements a tri-state consistency check, using a boolean * consistent function provided by the opclass. * @@ -124,12 +171,12 @@ shimTriConsistentFn(GinScanKey key) * function as is. */ if (nmaybe == 0) - return normalBoolConsistentFn(key); + return directBoolConsistentFn(key); /* First call consistent function with all the maybe-inputs set FALSE */ for (i = 0; i < nmaybe; i++) key->entryRes[maybeEntries[i]] = GIN_FALSE; - curResult = normalBoolConsistentFn(key); + curResult = directBoolConsistentFn(key); for (;;) { @@ -147,7 +194,7 @@ shimTriConsistentFn(GinScanKey key) if (i == nmaybe) break; - boolResult = normalBoolConsistentFn(key); + boolResult = directBoolConsistentFn(key); recheck |= key->recheckCurItem; if (curResult != boolResult) @@ -175,8 +222,17 @@ ginInitConsistentFunction(GinState *ginstate, GinScanKey key) else { key->consistentFmgrInfo = &ginstate->consistentFn[key->attnum - 1]; + key->triConsistentFmgrInfo = &ginstate->triConsistentFn[key->attnum - 1]; key->collation = ginstate->supportCollation[key->attnum - 1]; - key->boolConsistentFn = normalBoolConsistentFn; - key->triConsistentFn = shimTriConsistentFn; + + if (OidIsValid(ginstate->consistentFn[key->attnum - 1].fn_oid)) + key->boolConsistentFn = directBoolConsistentFn; + else + key->boolConsistentFn = shimBoolConsistentFn; + + if (OidIsValid(ginstate->triConsistentFn[key->attnum - 1].fn_oid)) + key->triConsistentFn = directTriConsistentFn; + else + key->triConsistentFn = shimTriConsistentFn; } } diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c index 486f2ef1f11..4dadb50dcaa 100644 --- a/src/backend/access/gin/ginutil.c +++ b/src/backend/access/gin/ginutil.c @@ -67,9 +67,31 @@ initGinState(GinState *state, Relation index) fmgr_info_copy(&(state->extractQueryFn[i]), index_getprocinfo(index, i + 1, GIN_EXTRACTQUERY_PROC), CurrentMemoryContext); - fmgr_info_copy(&(state->consistentFn[i]), - index_getprocinfo(index, i + 1, GIN_CONSISTENT_PROC), - CurrentMemoryContext); + /* + * Check opclass capability to do tri-state or binary logic consistent + * check. + */ + if (index_getprocid(index, i + 1, GIN_TRICONSISTENT_PROC) != InvalidOid) + { + fmgr_info_copy(&(state->triConsistentFn[i]), + index_getprocinfo(index, i + 1, GIN_TRICONSISTENT_PROC), + CurrentMemoryContext); + } + + if (index_getprocid(index, i + 1, GIN_CONSISTENT_PROC) != InvalidOid) + { + fmgr_info_copy(&(state->consistentFn[i]), + index_getprocinfo(index, i + 1, GIN_CONSISTENT_PROC), + CurrentMemoryContext); + } + + if (state->consistentFn[i].fn_oid == InvalidOid && + state->triConsistentFn[i].fn_oid == InvalidOid) + { + elog(ERROR, "missing GIN support function (%d or %d) for attribute %d of index \"%s\"", + GIN_CONSISTENT_PROC, GIN_TRICONSISTENT_PROC, + i + 1, RelationGetRelationName(index)); + } /* * Check opclass capability to do partial match. diff --git a/src/backend/utils/adt/tsginidx.c b/src/backend/utils/adt/tsginidx.c index 9f6e8e9fd57..47685e8b930 100644 --- a/src/backend/utils/adt/tsginidx.c +++ b/src/backend/utils/adt/tsginidx.c @@ -15,6 +15,7 @@ #include "access/gin.h" #include "access/skey.h" +#include "miscadmin.h" #include "tsearch/ts_type.h" #include "tsearch/ts_utils.h" #include "utils/builtins.h" @@ -172,12 +173,12 @@ gin_extract_tsquery(PG_FUNCTION_ARGS) typedef struct { QueryItem *first_item; - bool *check; + GinLogicValue *check; int *map_item_operand; bool *need_recheck; } GinChkVal; -static bool +static GinLogicValue checkcondition_gin(void *checkval, QueryOperand *val) { GinChkVal *gcv = (GinChkVal *) checkval; @@ -194,6 +195,66 @@ checkcondition_gin(void *checkval, QueryOperand *val) return gcv->check[j]; } +/* + * Evaluate tsquery boolean expression using ternary logic. + * + * chkcond is a callback function used to evaluate each VAL node in the query. + * checkval can be used to pass information to the callback. TS_execute doesn't + * do anything with it. + */ +static GinLogicValue +TS_execute_ternary(QueryItem *curitem, void *checkval, + GinLogicValue (*chkcond) (void *checkval, QueryOperand *val)) +{ + GinLogicValue val1, val2, result; + /* since this function recurses, it could be driven to stack overflow */ + check_stack_depth(); + + if (curitem->type == QI_VAL) + return chkcond(checkval, (QueryOperand *) curitem); + + switch (curitem->qoperator.oper) + { + case OP_NOT: + result = TS_execute_ternary(curitem + 1, checkval, chkcond); + if (result == GIN_MAYBE) + return result; + return !result; + + case OP_AND: + val1 = TS_execute_ternary(curitem + curitem->qoperator.left, + checkval, chkcond); + if (val1 == GIN_FALSE) + return GIN_FALSE; + val2 = TS_execute_ternary(curitem + 1, checkval, chkcond); + if (val2 == GIN_FALSE) + return GIN_FALSE; + if (val1 == GIN_TRUE && val2 == GIN_TRUE) + return GIN_TRUE; + else + return GIN_MAYBE; + + case OP_OR: + val1 = TS_execute_ternary(curitem + curitem->qoperator.left, + checkval, chkcond); + if (val1 == GIN_TRUE) + return GIN_TRUE; + val2 = TS_execute_ternary(curitem + 1, checkval, chkcond); + if (val2 == GIN_TRUE) + return GIN_TRUE; + if (val1 == GIN_FALSE && val2 == GIN_FALSE) + return GIN_FALSE; + else + return GIN_MAYBE; + + default: + elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper); + } + + /* not reachable, but keep compiler quiet */ + return false; +} + Datum gin_tsquery_consistent(PG_FUNCTION_ARGS) { @@ -233,6 +294,45 @@ gin_tsquery_consistent(PG_FUNCTION_ARGS) PG_RETURN_BOOL(res); } +Datum +gin_tsquery_triconsistent(PG_FUNCTION_ARGS) +{ + GinLogicValue *check = (GinLogicValue *) PG_GETARG_POINTER(0); + + /* StrategyNumber strategy = PG_GETARG_UINT16(1); */ + TSQuery query = PG_GETARG_TSQUERY(2); + + /* int32 nkeys = PG_GETARG_INT32(3); */ + Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); + GinLogicValue res = GIN_FALSE; + bool recheck; + + /* The query requires recheck only if it involves weights */ + if (query->size > 0) + { + QueryItem *item; + GinChkVal gcv; + + /* + * check-parameter array has one entry for each value (operand) in the + * query. + */ + gcv.first_item = item = GETQUERY(query); + gcv.check = check; + gcv.map_item_operand = (int *) (extra_data[0]); + gcv.need_recheck = &recheck; + + res = TS_execute_ternary(GETQUERY(query), + &gcv, + checkcondition_gin); + + if (res == GIN_TRUE && recheck) + res = GIN_MAYBE; + } + + PG_RETURN_GIN_LOGIC_VALUE(res); +} + /* * Formerly, gin_extract_tsvector had only two arguments. Now it has three, * but we still need a pg_proc entry with two args to support reloading |
