summaryrefslogtreecommitdiff
path: root/src/test
diff options
context:
space:
mode:
authorDavid Rowley2021-04-08 11:51:22 +0000
committerDavid Rowley2021-04-08 11:51:22 +0000
commit50e17ad281b8d1c1b410c9833955bc80fbad4078 (patch)
treefaf07e47e95ceade572aaf2afdca08bc35ed69e7 /src/test
parent1d257577e08d3e598011d6850fd1025858de8c8c (diff)
Speedup ScalarArrayOpExpr evaluation
ScalarArrayOpExprs with "useOr=true" and a set of Consts on the righthand side have traditionally been evaluated by using a linear search over the array. When these arrays contain large numbers of elements then this linear search could become a significant part of execution time. Here we add a new method of evaluating ScalarArrayOpExpr expressions to allow them to be evaluated by first building a hash table containing each element, then on subsequent evaluations, we just probe that hash table to determine if there is a match. The planner is in charge of determining when this optimization is possible and it enables it by setting hashfuncid in the ScalarArrayOpExpr. The executor will only perform the hash table evaluation when the hashfuncid is set. This means that not all cases are optimized. For example CHECK constraints containing an IN clause won't go through the planner, so won't get the hashfuncid set. We could maybe do something about that at some later date. The reason we're not doing it now is from fear that we may slow down cases where the expression is evaluated only once. Those cases can be common, for example, a single row INSERT to a table with a CHECK constraint containing an IN clause. In the planner, we enable this when there are suitable hash functions for the ScalarArrayOpExpr's operator and only when there is at least MIN_ARRAY_SIZE_FOR_HASHED_SAOP elements in the array. The threshold is currently set to 9. Author: James Coleman, David Rowley Reviewed-by: David Rowley, Tomas Vondra, Heikki Linnakangas Discussion: https://postgr.es/m/CAAaqYe8x62+=wn0zvNKCj55tPpg-JBHzhZFFc6ANovdqFw7-dA@mail.gmail.com
Diffstat (limited to 'src/test')
-rw-r--r--src/test/regress/expected/expressions.out118
-rw-r--r--src/test/regress/sql/expressions.sql85
2 files changed, 203 insertions, 0 deletions
diff --git a/src/test/regress/expected/expressions.out b/src/test/regress/expected/expressions.out
index 05a6eb07b2e..5944dfd5e1a 100644
--- a/src/test/regress/expected/expressions.out
+++ b/src/test/regress/expected/expressions.out
@@ -158,3 +158,121 @@ select count(*) from date_tbl
13
(1 row)
+--
+-- Tests for ScalarArrayOpExpr with a hashfn
+--
+-- create a stable function so that the tests below are not
+-- evaluated using the planner's constant folding.
+begin;
+create function return_int_input(int) returns int as $$
+begin
+ return $1;
+end;
+$$ language plpgsql stable;
+create function return_text_input(text) returns text as $$
+begin
+ return $1;
+end;
+$$ language plpgsql stable;
+select return_int_input(1) in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1);
+ ?column?
+----------
+ t
+(1 row)
+
+select return_int_input(1) in (10, 9, 2, 8, 3, 7, 4, 6, 5, null);
+ ?column?
+----------
+
+(1 row)
+
+select return_int_input(1) in (null, null, null, null, null, null, null, null, null, null, null);
+ ?column?
+----------
+
+(1 row)
+
+select return_int_input(1) in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1, null);
+ ?column?
+----------
+ t
+(1 row)
+
+select return_int_input(null::int) in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1);
+ ?column?
+----------
+
+(1 row)
+
+select return_int_input(null::int) in (10, 9, 2, 8, 3, 7, 4, 6, 5, null);
+ ?column?
+----------
+
+(1 row)
+
+select return_text_input('a') in ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j');
+ ?column?
+----------
+ t
+(1 row)
+
+rollback;
+-- Test with non-strict equality function.
+-- We need to create our own type for this.
+begin;
+create type myint;
+create function myintin(cstring) returns myint strict immutable language
+ internal as 'int4in';
+NOTICE: return type myint is only a shell
+create function myintout(myint) returns cstring strict immutable language
+ internal as 'int4out';
+NOTICE: argument type myint is only a shell
+create function myinthash(myint) returns integer strict immutable language
+ internal as 'hashint4';
+NOTICE: argument type myint is only a shell
+create type myint (input = myintin, output = myintout, like = int4);
+create cast (int4 as myint) without function;
+create cast (myint as int4) without function;
+create function myinteq(myint, myint) returns bool as $$
+begin
+ if $1 is null and $2 is null then
+ return true;
+ else
+ return $1::int = $2::int;
+ end if;
+end;
+$$ language plpgsql immutable;
+create operator = (
+ leftarg = myint,
+ rightarg = myint,
+ commutator = =,
+ negator = <>,
+ procedure = myinteq,
+ restrict = eqsel,
+ join = eqjoinsel,
+ merges
+);
+create operator class myint_ops
+default for type myint using hash as
+ operator 1 = (myint, myint),
+ function 1 myinthash(myint);
+create table inttest (a myint);
+insert into inttest values(1::myint),(null);
+-- try an array with enough elements to cause hashing
+select * from inttest where a in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint, null);
+ a
+---
+ 1
+
+(2 rows)
+
+-- ensure the result matched with the non-hashed version. We simply remove
+-- some array elements so that we don't reach the hashing threshold.
+select * from inttest where a in (1::myint,2::myint,3::myint,4::myint,5::myint, null);
+ a
+---
+ 1
+
+(2 rows)
+
+rollback;
diff --git a/src/test/regress/sql/expressions.sql b/src/test/regress/sql/expressions.sql
index 1ca8bb151c8..b3fd1b5ecba 100644
--- a/src/test/regress/sql/expressions.sql
+++ b/src/test/regress/sql/expressions.sql
@@ -65,3 +65,88 @@ select count(*) from date_tbl
where f1 not between symmetric '1997-01-01' and '1998-01-01';
select count(*) from date_tbl
where f1 not between symmetric '1997-01-01' and '1998-01-01';
+
+--
+-- Tests for ScalarArrayOpExpr with a hashfn
+--
+
+-- create a stable function so that the tests below are not
+-- evaluated using the planner's constant folding.
+begin;
+
+create function return_int_input(int) returns int as $$
+begin
+ return $1;
+end;
+$$ language plpgsql stable;
+
+create function return_text_input(text) returns text as $$
+begin
+ return $1;
+end;
+$$ language plpgsql stable;
+
+select return_int_input(1) in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1);
+select return_int_input(1) in (10, 9, 2, 8, 3, 7, 4, 6, 5, null);
+select return_int_input(1) in (null, null, null, null, null, null, null, null, null, null, null);
+select return_int_input(1) in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1, null);
+select return_int_input(null::int) in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1);
+select return_int_input(null::int) in (10, 9, 2, 8, 3, 7, 4, 6, 5, null);
+select return_text_input('a') in ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j');
+
+rollback;
+
+-- Test with non-strict equality function.
+-- We need to create our own type for this.
+
+begin;
+
+create type myint;
+create function myintin(cstring) returns myint strict immutable language
+ internal as 'int4in';
+create function myintout(myint) returns cstring strict immutable language
+ internal as 'int4out';
+create function myinthash(myint) returns integer strict immutable language
+ internal as 'hashint4';
+
+create type myint (input = myintin, output = myintout, like = int4);
+
+create cast (int4 as myint) without function;
+create cast (myint as int4) without function;
+
+create function myinteq(myint, myint) returns bool as $$
+begin
+ if $1 is null and $2 is null then
+ return true;
+ else
+ return $1::int = $2::int;
+ end if;
+end;
+$$ language plpgsql immutable;
+
+create operator = (
+ leftarg = myint,
+ rightarg = myint,
+ commutator = =,
+ negator = <>,
+ procedure = myinteq,
+ restrict = eqsel,
+ join = eqjoinsel,
+ merges
+);
+
+create operator class myint_ops
+default for type myint using hash as
+ operator 1 = (myint, myint),
+ function 1 myinthash(myint);
+
+create table inttest (a myint);
+insert into inttest values(1::myint),(null);
+
+-- try an array with enough elements to cause hashing
+select * from inttest where a in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint, null);
+-- ensure the result matched with the non-hashed version. We simply remove
+-- some array elements so that we don't reach the hashing threshold.
+select * from inttest where a in (1::myint,2::myint,3::myint,4::myint,5::myint, null);
+
+rollback;