Hash support for row types

Add hash functions for the record type as well as a hash operator family and operator class for the record type. This enables all the hash functionality for the record type such as hash-based plans for UNION/INTERSECT/EXCEPT DISTINCT, recursive queries using UNION DISTINCT, hash joins, and hash partitioning. Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Discussion: https://www.postgresql.org/message-id/flat/38eccd35-4e2d-6767-1b3c-dada1eac3124%402ndquadrant.com
author: Peter Eisentraut 2020-11-19 08:24:37 +0000
committer: Peter Eisentraut 2020-11-19 08:32:47 +0000
commit: 01e658fa74cb7e3292448f6663b549135958003b (patch)
tree: d87a206912e085d7bcdadc424b571055055e58e2 /src/test
parent: 7888b0999488511e4266f2134053fa3a6505a155 (diff)
8 files changed, 130 insertions, 40 deletions
diff --git a/src/test/regress/expected/hash_func.out b/src/test/regress/expected/hash_func.out
index e7d615fde59..daeb3e118dd 100644
--- a/src/test/regress/expected/hash_func.out
+++ b/src/test/regress/expected/hash_func.out
@@ -305,3 +305,24 @@ WHERE  hash_range(v)::bit(32) != hash_range_extended(v, 0)::bit(32)
 -------+----------+-----------+-----------
 (0 rows)
 
+CREATE TYPE t1 AS (a int, b text);
+SELECT v as value, hash_record(v)::bit(32) as standard,
+       hash_record_extended(v, 0)::bit(32) as extended0,
+       hash_record_extended(v, 1)::bit(32) as extended1
+FROM   (VALUES (row(1, 'aaa')::t1, row(2, 'bbb'), row(-1, 'ccc'))) x(v)
+WHERE  hash_record(v)::bit(32) != hash_record_extended(v, 0)::bit(32)
+       OR hash_record(v)::bit(32) = hash_record_extended(v, 1)::bit(32);
+ value | standard | extended0 | extended1 
+-------+----------+-----------+-----------
+(0 rows)
+
+DROP TYPE t1;
+-- record hashing with non-hashable field type
+CREATE TYPE t2 AS (a money, b text);
+SELECT v as value, hash_record(v)::bit(32) as standard
+FROM   (VALUES (row(1, 'aaa')::t2)) x(v);
+ERROR:  could not identify a hash function for type money
+SELECT v as value, hash_record_extended(v, 0)::bit(32) as extended0
+FROM   (VALUES (row(1, 'aaa')::t2)) x(v);
+ERROR:  could not identify an extended hash function for type money
+DROP TYPE t2;
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 6c9a5e26dde..60b621b651f 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -2707,6 +2707,7 @@ select a.idv, b.idv from tidv a, tidv b where a.idv = b.idv;
 (5 rows)
 
 set enable_mergejoin = 0;
+set enable_hashjoin = 0;
 explain (costs off)
 select a.idv, b.idv from tidv a, tidv b where a.idv = b.idv;
                      QUERY PLAN                     
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out
index 22e1ff5c42d..75f78db8f58 100644
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -646,40 +646,36 @@ select x from (values (array[1, 2]), (array[1, 3])) _(x) except select x from (v
 reset enable_hashagg;
 -- records
 set enable_hashagg to on;
--- currently no hashing support for record, so these will still run with sort plans:
 explain (costs off)
 select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values (row(1, 2)), (row(1, 4))) _(x);
-                  QUERY PLAN                   
------------------------------------------------
- Unique
-   ->  Sort
-         Sort Key: "*VALUES*".column1
-         ->  Append
-               ->  Values Scan on "*VALUES*"
-               ->  Values Scan on "*VALUES*_1"
-(6 rows)
+               QUERY PLAN                
+-----------------------------------------
+ HashAggregate
+   Group Key: "*VALUES*".column1
+   ->  Append
+         ->  Values Scan on "*VALUES*"
+         ->  Values Scan on "*VALUES*_1"
+(5 rows)
 
 select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values (row(1, 2)), (row(1, 4))) _(x);
    x   
 -------
- (1,2)
- (1,3)
  (1,4)
+ (1,3)
+ (1,2)
 (3 rows)
 
 explain (costs off)
 select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x);
-                     QUERY PLAN                      
------------------------------------------------------
- SetOp Intersect
-   ->  Sort
-         Sort Key: "*SELECT* 1".x
-         ->  Append
-               ->  Subquery Scan on "*SELECT* 1"
-                     ->  Values Scan on "*VALUES*"
-               ->  Subquery Scan on "*SELECT* 2"
-                     ->  Values Scan on "*VALUES*_1"
-(8 rows)
+                  QUERY PLAN                   
+-----------------------------------------------
+ HashSetOp Intersect
+   ->  Append
+         ->  Subquery Scan on "*SELECT* 1"
+               ->  Values Scan on "*VALUES*"
+         ->  Subquery Scan on "*SELECT* 2"
+               ->  Values Scan on "*VALUES*_1"
+(6 rows)
 
 select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x);
    x   
@@ -689,17 +685,15 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (va
 
 explain (costs off)
 select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x);
-                     QUERY PLAN                      
------------------------------------------------------
- SetOp Except
-   ->  Sort
-         Sort Key: "*SELECT* 1".x
-         ->  Append
-               ->  Subquery Scan on "*SELECT* 1"
-                     ->  Values Scan on "*VALUES*"
-               ->  Subquery Scan on "*SELECT* 2"
-                     ->  Values Scan on "*VALUES*_1"
-(8 rows)
+                  QUERY PLAN                   
+-----------------------------------------------
+ HashSetOp Except
+   ->  Append
+         ->  Subquery Scan on "*SELECT* 1"
+               ->  Values Scan on "*VALUES*"
+         ->  Subquery Scan on "*SELECT* 2"
+               ->  Values Scan on "*VALUES*_1"
+(6 rows)
 
 select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x);
    x   
@@ -708,8 +702,26 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (value
 (1 row)
 
 -- non-hashable type
+-- With an anonymous row type, the typcache reports that the type is
+-- hashable, but then it will fail at run time.
 explain (costs off)
 select x from (values (row(100::money)), (row(200::money))) _(x) union select x from (values (row(100::money)), (row(300::money))) _(x);
+               QUERY PLAN                
+-----------------------------------------
+ HashAggregate
+   Group Key: "*VALUES*".column1
+   ->  Append
+         ->  Values Scan on "*VALUES*"
+         ->  Values Scan on "*VALUES*_1"
+(5 rows)
+
+select x from (values (row(100::money)), (row(200::money))) _(x) union select x from (values (row(100::money)), (row(300::money))) _(x);
+ERROR:  could not identify a hash function for type money
+-- With a defined row type, the typcache can inspect the type's fields
+-- for hashability.
+create type ct1 as (f1 money);
+explain (costs off)
+select x from (values (row(100::money)::ct1), (row(200::money)::ct1)) _(x) union select x from (values (row(100::money)::ct1), (row(300::money)::ct1)) _(x);
                   QUERY PLAN                   
 -----------------------------------------------
  Unique
@@ -720,7 +732,7 @@ select x from (values (row(100::money)), (row(200::money))) _(x) union select x
                ->  Values Scan on "*VALUES*_1"
 (6 rows)
 
-select x from (values (row(100::money)), (row(200::money))) _(x) union select x from (values (row(100::money)), (row(300::money))) _(x);
+select x from (values (row(100::money)::ct1), (row(200::money)::ct1)) _(x) union select x from (values (row(100::money)::ct1), (row(300::money)::ct1)) _(x);
      x     
 -----------
  ($100.00)
@@ -728,6 +740,7 @@ select x from (values (row(100::money)), (row(200::money))) _(x) union select x
  ($300.00)
 (3 rows)
 
+drop type ct1;
 set enable_hashagg to off;
 explain (costs off)
 select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values (row(1, 2)), (row(1, 4))) _(x);
diff --git a/src/test/regress/expected/with.out b/src/test/regress/expected/with.out
index 1f984a9fa48..96835a517ee 100644
--- a/src/test/regress/expected/with.out
+++ b/src/test/regress/expected/with.out
@@ -625,7 +625,7 @@ select * from search_graph;
  2 | 3 | arc 2 -> 3 | f        | {"(1,4)","(4,5)","(5,1)","(1,2)","(2,3)"}
 (25 rows)
 
--- UNION DISTINCT currently not supported here because row types not hashable
+-- UNION DISTINCT exercises row type hashing support
 with recursive search_graph(f, t, label, is_cycle, path) as (
 	select *, false, array[row(g.f, g.t)] from graph g
 	union distinct
@@ -634,8 +634,35 @@ with recursive search_graph(f, t, label, is_cycle, path) as (
 	where g.f = sg.t and not is_cycle
 )
 select * from search_graph;
-ERROR:  could not implement recursive UNION
-DETAIL:  All column datatypes must be hashable.
+ f | t |   label    | is_cycle |                   path                    
+---+---+------------+----------+-------------------------------------------
+ 1 | 2 | arc 1 -> 2 | f        | {"(1,2)"}
+ 1 | 3 | arc 1 -> 3 | f        | {"(1,3)"}
+ 2 | 3 | arc 2 -> 3 | f        | {"(2,3)"}
+ 1 | 4 | arc 1 -> 4 | f        | {"(1,4)"}
+ 4 | 5 | arc 4 -> 5 | f        | {"(4,5)"}
+ 5 | 1 | arc 5 -> 1 | f        | {"(5,1)"}
+ 1 | 2 | arc 1 -> 2 | f        | {"(5,1)","(1,2)"}
+ 1 | 3 | arc 1 -> 3 | f        | {"(5,1)","(1,3)"}
+ 1 | 4 | arc 1 -> 4 | f        | {"(5,1)","(1,4)"}
+ 2 | 3 | arc 2 -> 3 | f        | {"(1,2)","(2,3)"}
+ 4 | 5 | arc 4 -> 5 | f        | {"(1,4)","(4,5)"}
+ 5 | 1 | arc 5 -> 1 | f        | {"(4,5)","(5,1)"}
+ 1 | 2 | arc 1 -> 2 | f        | {"(4,5)","(5,1)","(1,2)"}
+ 1 | 3 | arc 1 -> 3 | f        | {"(4,5)","(5,1)","(1,3)"}
+ 1 | 4 | arc 1 -> 4 | f        | {"(4,5)","(5,1)","(1,4)"}
+ 2 | 3 | arc 2 -> 3 | f        | {"(5,1)","(1,2)","(2,3)"}
+ 4 | 5 | arc 4 -> 5 | f        | {"(5,1)","(1,4)","(4,5)"}
+ 5 | 1 | arc 5 -> 1 | f        | {"(1,4)","(4,5)","(5,1)"}
+ 1 | 2 | arc 1 -> 2 | f        | {"(1,4)","(4,5)","(5,1)","(1,2)"}
+ 1 | 3 | arc 1 -> 3 | f        | {"(1,4)","(4,5)","(5,1)","(1,3)"}
+ 1 | 4 | arc 1 -> 4 | t        | {"(1,4)","(4,5)","(5,1)","(1,4)"}
+ 2 | 3 | arc 2 -> 3 | f        | {"(4,5)","(5,1)","(1,2)","(2,3)"}
+ 4 | 5 | arc 4 -> 5 | t        | {"(4,5)","(5,1)","(1,4)","(4,5)"}
+ 5 | 1 | arc 5 -> 1 | t        | {"(5,1)","(1,4)","(4,5)","(5,1)"}
+ 2 | 3 | arc 2 -> 3 | f        | {"(1,4)","(4,5)","(5,1)","(1,2)","(2,3)"}
+(25 rows)
+
 -- ordering by the path column has same effect as SEARCH DEPTH FIRST
 with recursive search_graph(f, t, label, is_cycle, path) as (
 	select *, false, array[row(g.f, g.t)] from graph g
diff --git a/src/test/regress/sql/hash_func.sql b/src/test/regress/sql/hash_func.sql
index de84e68ba31..280b0595834 100644
--- a/src/test/regress/sql/hash_func.sql
+++ b/src/test/regress/sql/hash_func.sql
@@ -226,3 +226,20 @@ FROM   (VALUES (int4range(10, 20)), (int4range(23, 43)),
         (int4range(550274, 1550274)), (int4range(1550275, 208112489))) x(v)
 WHERE  hash_range(v)::bit(32) != hash_range_extended(v, 0)::bit(32)
        OR hash_range(v)::bit(32) = hash_range_extended(v, 1)::bit(32);
+
+CREATE TYPE t1 AS (a int, b text);
+SELECT v as value, hash_record(v)::bit(32) as standard,
+       hash_record_extended(v, 0)::bit(32) as extended0,
+       hash_record_extended(v, 1)::bit(32) as extended1
+FROM   (VALUES (row(1, 'aaa')::t1, row(2, 'bbb'), row(-1, 'ccc'))) x(v)
+WHERE  hash_record(v)::bit(32) != hash_record_extended(v, 0)::bit(32)
+       OR hash_record(v)::bit(32) = hash_record_extended(v, 1)::bit(32);
+DROP TYPE t1;
+
+-- record hashing with non-hashable field type
+CREATE TYPE t2 AS (a money, b text);
+SELECT v as value, hash_record(v)::bit(32) as standard
+FROM   (VALUES (row(1, 'aaa')::t2)) x(v);
+SELECT v as value, hash_record_extended(v, 0)::bit(32) as extended0
+FROM   (VALUES (row(1, 'aaa')::t2)) x(v);
+DROP TYPE t2;
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index dd60d6a1f3b..d687216618c 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -700,6 +700,7 @@ explain (costs off)
 select a.idv, b.idv from tidv a, tidv b where a.idv = b.idv;
 
 set enable_mergejoin = 0;
+set enable_hashjoin = 0;
 
 explain (costs off)
 select a.idv, b.idv from tidv a, tidv b where a.idv = b.idv;
diff --git a/src/test/regress/sql/union.sql b/src/test/regress/sql/union.sql
index 6cee454a4cf..ce22f34c719 100644
--- a/src/test/regress/sql/union.sql
+++ b/src/test/regress/sql/union.sql
@@ -206,7 +206,6 @@ reset enable_hashagg;
 -- records
 set enable_hashagg to on;
 
--- currently no hashing support for record, so these will still run with sort plans:
 explain (costs off)
 select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values (row(1, 2)), (row(1, 4))) _(x);
 select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values (row(1, 2)), (row(1, 4))) _(x);
@@ -218,10 +217,21 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (value
 select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x);
 
 -- non-hashable type
+
+-- With an anonymous row type, the typcache reports that the type is
+-- hashable, but then it will fail at run time.
 explain (costs off)
 select x from (values (row(100::money)), (row(200::money))) _(x) union select x from (values (row(100::money)), (row(300::money))) _(x);
 select x from (values (row(100::money)), (row(200::money))) _(x) union select x from (values (row(100::money)), (row(300::money))) _(x);
 
+-- With a defined row type, the typcache can inspect the type's fields
+-- for hashability.
+create type ct1 as (f1 money);
+explain (costs off)
+select x from (values (row(100::money)::ct1), (row(200::money)::ct1)) _(x) union select x from (values (row(100::money)::ct1), (row(300::money)::ct1)) _(x);
+select x from (values (row(100::money)::ct1), (row(200::money)::ct1)) _(x) union select x from (values (row(100::money)::ct1), (row(300::money)::ct1)) _(x);
+drop type ct1;
+
 set enable_hashagg to off;
 
 explain (costs off)
diff --git a/src/test/regress/sql/with.sql b/src/test/regress/sql/with.sql
index c6ce01a2d11..b1b79eb1722 100644
--- a/src/test/regress/sql/with.sql
+++ b/src/test/regress/sql/with.sql
@@ -325,7 +325,7 @@ with recursive search_graph(f, t, label, is_cycle, path) as (
 )
 select * from search_graph;
 
--- UNION DISTINCT currently not supported here because row types not hashable
+-- UNION DISTINCT exercises row type hashing support
 with recursive search_graph(f, t, label, is_cycle, path) as (
 	select *, false, array[row(g.f, g.t)] from graph g
 	union distinct
author	Peter Eisentraut	2020-11-19 08:24:37 +0000
committer	Peter Eisentraut	2020-11-19 08:32:47 +0000
commit	01e658fa74cb7e3292448f6663b549135958003b (patch)
tree	d87a206912e085d7bcdadc424b571055055e58e2 /src/test
parent	7888b0999488511e4266f2134053fa3a6505a155 (diff)