Allow subscripting of hstore values.
authorTom Lane <tgl@sss.pgh.pa.us>
Fri, 11 Dec 2020 23:58:07 +0000 (18:58 -0500)
committerTom Lane <tgl@sss.pgh.pa.us>
Fri, 11 Dec 2020 23:58:21 +0000 (18:58 -0500)
This is basically a finger exercise to prove that it's possible for
an extension module to add subscripting ability.  Subscripted fetch
from an hstore is not different from the existing "hstore -> text"
operator.  Subscripted update does seem to be a little easier to
use than the traditional update method using hstore concatenation,
but it's not a fundamentally new ability.

However, there may be some value in the code as sample code, since
it shows what's basically the minimum-complexity way to implement
subscripting when one needn't consider nested container objects.

Discussion: https://postgr.es/m/3724341.1607551174@sss.pgh.pa.us

contrib/hstore/Makefile
contrib/hstore/expected/hstore.out
contrib/hstore/hstore--1.7--1.8.sql [new file with mode: 0644]
contrib/hstore/hstore.control
contrib/hstore/hstore_subs.c [new file with mode: 0644]
contrib/hstore/sql/hstore.sql
doc/src/sgml/hstore.sgml
doc/src/sgml/ref/create_type.sgml

index 72376d90076335ce1f029420436ba12df714b60e..c4e339b57c1c5d793228ff7445d5ded4ad3de420 100644 (file)
@@ -7,10 +7,12 @@ OBJS = \
        hstore_gin.o \
        hstore_gist.o \
        hstore_io.o \
-       hstore_op.o
+       hstore_op.o \
+       hstore_subs.o
 
 EXTENSION = hstore
 DATA = hstore--1.4.sql \
+       hstore--1.7--1.8.sql \
        hstore--1.6--1.7.sql \
        hstore--1.5--1.6.sql \
        hstore--1.4--1.5.sql \
index 890107943800087a7e7eef498a1e23621416b469..fdcc3920cecd85f6d2784f225969bf2abecb6061 100644 (file)
@@ -1560,6 +1560,29 @@ select json_agg(q) from (select f1, hstore_to_json_loose(f2) as f2 from test_jso
   {"f1":"rec2","f2":{"b": false, "c": "null", "d": -12345, "e": "012345.6", "f": -1.234, "g": 0.345e-4, "a key": 2}}]
 (1 row)
 
+-- Test subscripting
+insert into test_json_agg default values;
+select f2['d'], f2['x'] is null as x_isnull from test_json_agg;
+   f2   | x_isnull 
+--------+----------
+ 12345  | t
+ -12345 | t
+        | t
+(3 rows)
+
+select f2['d']['e'] from test_json_agg;  -- error
+ERROR:  hstore allows only one subscript
+select f2['d':'e'] from test_json_agg;  -- error
+ERROR:  hstore allows only one subscript
+update test_json_agg set f2['d'] = f2['e'], f2['x'] = 'xyzzy';
+select f2 from test_json_agg;
+                                                         f2                                                          
+---------------------------------------------------------------------------------------------------------------------
+ "b"=>"t", "c"=>NULL, "d"=>"012345", "e"=>"012345", "f"=>"1.234", "g"=>"2.345e+4", "x"=>"xyzzy", "a key"=>"1"
+ "b"=>"f", "c"=>"null", "d"=>"012345.6", "e"=>"012345.6", "f"=>"-1.234", "g"=>"0.345e-4", "x"=>"xyzzy", "a key"=>"2"
+ "d"=>NULL, "x"=>"xyzzy"
+(3 rows)
+
 -- Check the hstore_hash() and hstore_hash_extended() function explicitly.
 SELECT v as value, hstore_hash(v)::bit(32) as standard,
        hstore_hash_extended(v, 0)::bit(32) as extended0,
diff --git a/contrib/hstore/hstore--1.7--1.8.sql b/contrib/hstore/hstore--1.7--1.8.sql
new file mode 100644 (file)
index 0000000..d80a138
--- /dev/null
@@ -0,0 +1,13 @@
+/* contrib/hstore/hstore--1.7--1.8.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION hstore UPDATE TO '1.8'" to load this file. \quit
+
+CREATE FUNCTION hstore_subscript_handler(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME', 'hstore_subscript_handler'
+LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
+
+ALTER TYPE hstore SET (
+  SUBSCRIPT = hstore_subscript_handler
+);
index f0da7724295c7dba9c919b69788d13b1aab53f21..89e3c746c461d189551069cfc4554b73810991aa 100644 (file)
@@ -1,6 +1,6 @@
 # hstore extension
 comment = 'data type for storing sets of (key, value) pairs'
-default_version = '1.7'
+default_version = '1.8'
 module_pathname = '$libdir/hstore'
 relocatable = true
 trusted = true
diff --git a/contrib/hstore/hstore_subs.c b/contrib/hstore/hstore_subs.c
new file mode 100644 (file)
index 0000000..e52de04
--- /dev/null
@@ -0,0 +1,297 @@
+/*-------------------------------------------------------------------------
+ *
+ * hstore_subs.c
+ *       Subscripting support functions for hstore.
+ *
+ * This is a great deal simpler than array_subs.c, because the result of
+ * subscripting an hstore is just a text string (the value for the key).
+ * We do not need to support array slicing notation, nor multiple subscripts.
+ * Less obviously, because the subscript result is never a SQL container
+ * type, there will never be any nested-assignment scenarios, so we do not
+ * need a fetch_old function.  In turn, that means we can drop the
+ * check_subscripts function and just let the fetch and assign functions
+ * do everything.
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *       contrib/hstore/hstore_subs.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "executor/execExpr.h"
+#include "hstore.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/subscripting.h"
+#include "parser/parse_coerce.h"
+#include "parser/parse_expr.h"
+#include "utils/builtins.h"
+
+
+/*
+ * Finish parse analysis of a SubscriptingRef expression for hstore.
+ *
+ * Verify there's just one subscript, coerce it to text,
+ * and set the result type of the SubscriptingRef node.
+ */
+static void
+hstore_subscript_transform(SubscriptingRef *sbsref,
+                                                  List *indirection,
+                                                  ParseState *pstate,
+                                                  bool isSlice,
+                                                  bool isAssignment)
+{
+       A_Indices  *ai;
+       Node       *subexpr;
+
+       /* We support only single-subscript, non-slice cases */
+       if (isSlice || list_length(indirection) != 1)
+               ereport(ERROR,
+                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                errmsg("hstore allows only one subscript"),
+                                parser_errposition(pstate,
+                                                                       exprLocation((Node *) indirection))));
+
+       /* Transform the subscript expression to type text */
+       ai = linitial_node(A_Indices, indirection);
+       Assert(ai->uidx != NULL && ai->lidx == NULL && !ai->is_slice);
+
+       subexpr = transformExpr(pstate, ai->uidx, pstate->p_expr_kind);
+       /* If it's not text already, try to coerce */
+       subexpr = coerce_to_target_type(pstate,
+                                                                       subexpr, exprType(subexpr),
+                                                                       TEXTOID, -1,
+                                                                       COERCION_ASSIGNMENT,
+                                                                       COERCE_IMPLICIT_CAST,
+                                                                       -1);
+       if (subexpr == NULL)
+               ereport(ERROR,
+                               (errcode(ERRCODE_DATATYPE_MISMATCH),
+                                errmsg("hstore subscript must have type text"),
+                                parser_errposition(pstate, exprLocation(ai->uidx))));
+
+       /* ... and store the transformed subscript into the SubscriptRef node */
+       sbsref->refupperindexpr = list_make1(subexpr);
+       sbsref->reflowerindexpr = NIL;
+
+       /* Determine the result type of the subscripting operation; always text */
+       sbsref->refrestype = TEXTOID;
+       sbsref->reftypmod = -1;
+}
+
+/*
+ * Evaluate SubscriptingRef fetch for hstore.
+ *
+ * Source container is in step's result variable (it's known not NULL, since
+ * we set fetch_strict to true), and the subscript expression is in the
+ * upperindex[] array.
+ */
+static void
+hstore_subscript_fetch(ExprState *state,
+                                          ExprEvalStep *op,
+                                          ExprContext *econtext)
+{
+       SubscriptingRefState *sbsrefstate = op->d.sbsref.state;
+       HStore     *hs;
+       text       *key;
+       HEntry     *entries;
+       int                     idx;
+       text       *out;
+
+       /* Should not get here if source hstore is null */
+       Assert(!(*op->resnull));
+
+       /* Check for null subscript */
+       if (sbsrefstate->upperindexnull[0])
+       {
+               *op->resnull = true;
+               return;
+       }
+
+       /* OK, fetch/detoast the hstore and subscript */
+       hs = DatumGetHStoreP(*op->resvalue);
+       key = DatumGetTextPP(sbsrefstate->upperindex[0]);
+
+       /* The rest is basically the same as hstore_fetchval() */
+       entries = ARRPTR(hs);
+       idx = hstoreFindKey(hs, NULL,
+                                               VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
+
+       if (idx < 0 || HSTORE_VALISNULL(entries, idx))
+       {
+               *op->resnull = true;
+               return;
+       }
+
+       out = cstring_to_text_with_len(HSTORE_VAL(entries, STRPTR(hs), idx),
+                                                                  HSTORE_VALLEN(entries, idx));
+
+       *op->resvalue = PointerGetDatum(out);
+}
+
+/*
+ * Evaluate SubscriptingRef assignment for hstore.
+ *
+ * Input container (possibly null) is in result area, replacement value is in
+ * SubscriptingRefState's replacevalue/replacenull.
+ */
+static void
+hstore_subscript_assign(ExprState *state,
+                                               ExprEvalStep *op,
+                                               ExprContext *econtext)
+{
+       SubscriptingRefState *sbsrefstate = op->d.sbsref.state;
+       text       *key;
+       Pairs           p;
+       HStore     *out;
+
+       /* Check for null subscript */
+       if (sbsrefstate->upperindexnull[0])
+               ereport(ERROR,
+                               (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+                                errmsg("hstore subscript in assignment must not be null")));
+
+       /* OK, fetch/detoast the subscript */
+       key = DatumGetTextPP(sbsrefstate->upperindex[0]);
+
+       /* Create a Pairs entry for subscript + replacement value */
+       p.needfree = false;
+       p.key = VARDATA_ANY(key);
+       p.keylen = hstoreCheckKeyLen(VARSIZE_ANY_EXHDR(key));
+
+       if (sbsrefstate->replacenull)
+       {
+               p.vallen = 0;
+               p.isnull = true;
+       }
+       else
+       {
+               text       *val = DatumGetTextPP(sbsrefstate->replacevalue);
+
+               p.val = VARDATA_ANY(val);
+               p.vallen = hstoreCheckValLen(VARSIZE_ANY_EXHDR(val));
+               p.isnull = false;
+       }
+
+       if (*op->resnull)
+       {
+               /* Just build a one-element hstore (cf. hstore_from_text) */
+               out = hstorePairs(&p, 1, p.keylen + p.vallen);
+       }
+       else
+       {
+               /*
+                * Otherwise, merge the new key into the hstore.  Based on
+                * hstore_concat.
+                */
+               HStore     *hs = DatumGetHStoreP(*op->resvalue);
+               int                     s1count = HS_COUNT(hs);
+               int                     outcount = 0;
+               int                     vsize;
+               char       *ps1,
+                                  *bufd,
+                                  *pd;
+               HEntry     *es1,
+                                  *ed;
+               int                     s1idx;
+               int                     s2idx;
+
+               /* Allocate result without considering possibility of duplicate */
+               vsize = CALCDATASIZE(s1count + 1, VARSIZE(hs) + p.keylen + p.vallen);
+               out = palloc(vsize);
+               SET_VARSIZE(out, vsize);
+               HS_SETCOUNT(out, s1count + 1);
+
+               ps1 = STRPTR(hs);
+               bufd = pd = STRPTR(out);
+               es1 = ARRPTR(hs);
+               ed = ARRPTR(out);
+
+               for (s1idx = s2idx = 0; s1idx < s1count || s2idx < 1; ++outcount)
+               {
+                       int                     difference;
+
+                       if (s1idx >= s1count)
+                               difference = 1;
+                       else if (s2idx >= 1)
+                               difference = -1;
+                       else
+                       {
+                               int                     s1keylen = HSTORE_KEYLEN(es1, s1idx);
+                               int                     s2keylen = p.keylen;
+
+                               if (s1keylen == s2keylen)
+                                       difference = memcmp(HSTORE_KEY(es1, ps1, s1idx),
+                                                                               p.key,
+                                                                               s1keylen);
+                               else
+                                       difference = (s1keylen > s2keylen) ? 1 : -1;
+                       }
+
+                       if (difference >= 0)
+                       {
+                               HS_ADDITEM(ed, bufd, pd, p);
+                               ++s2idx;
+                               if (difference == 0)
+                                       ++s1idx;
+                       }
+                       else
+                       {
+                               HS_COPYITEM(ed, bufd, pd,
+                                                       HSTORE_KEY(es1, ps1, s1idx),
+                                                       HSTORE_KEYLEN(es1, s1idx),
+                                                       HSTORE_VALLEN(es1, s1idx),
+                                                       HSTORE_VALISNULL(es1, s1idx));
+                               ++s1idx;
+                       }
+               }
+
+               HS_FINALIZE(out, outcount, bufd, pd);
+       }
+
+       *op->resvalue = PointerGetDatum(out);
+       *op->resnull = false;
+}
+
+/*
+ * Set up execution state for an hstore subscript operation.
+ */
+static void
+hstore_exec_setup(const SubscriptingRef *sbsref,
+                                 SubscriptingRefState *sbsrefstate,
+                                 SubscriptExecSteps *methods)
+{
+       /* Assert we are dealing with one subscript */
+       Assert(sbsrefstate->numlower == 0);
+       Assert(sbsrefstate->numupper == 1);
+       /* We can't check upperprovided[0] here, but it must be true */
+
+       /* Pass back pointers to appropriate step execution functions */
+       methods->sbs_check_subscripts = NULL;
+       methods->sbs_fetch = hstore_subscript_fetch;
+       methods->sbs_assign = hstore_subscript_assign;
+       methods->sbs_fetch_old = NULL;
+}
+
+/*
+ * hstore_subscript_handler
+ *             Subscripting handler for hstore.
+ */
+PG_FUNCTION_INFO_V1(hstore_subscript_handler);
+Datum
+hstore_subscript_handler(PG_FUNCTION_ARGS)
+{
+       static const SubscriptRoutines sbsroutines = {
+               .transform = hstore_subscript_transform,
+               .exec_setup = hstore_exec_setup,
+               .fetch_strict = true,   /* fetch returns NULL for NULL inputs */
+               .fetch_leakproof = true,        /* fetch returns NULL for bad subscript */
+               .store_leakproof = false        /* ... but assignment throws error */
+       };
+
+       PG_RETURN_POINTER(&sbsroutines);
+}
index a6c2f3a0ce017d138cb6a37ba70160d961b6cbb3..8d96e30403042727ba31ee0fe2774ec06d7b5ecd 100644 (file)
@@ -364,6 +364,14 @@ insert into test_json_agg values ('rec1','"a key" =>1, b => t, c => null, d=> 12
 select json_agg(q) from test_json_agg q;
 select json_agg(q) from (select f1, hstore_to_json_loose(f2) as f2 from test_json_agg) q;
 
+-- Test subscripting
+insert into test_json_agg default values;
+select f2['d'], f2['x'] is null as x_isnull from test_json_agg;
+select f2['d']['e'] from test_json_agg;  -- error
+select f2['d':'e'] from test_json_agg;  -- error
+update test_json_agg set f2['d'] = f2['e'], f2['x'] = 'xyzzy';
+select f2 from test_json_agg;
+
 -- Check the hstore_hash() and hstore_hash_extended() function explicitly.
 SELECT v as value, hstore_hash(v)::bit(32) as standard,
        hstore_hash_extended(v, 0)::bit(32) as extended0,
index 14a36ade00a0b7de3368cd9c8317843f6d4886fc..080706280e80954b6041292eefee742e54e65223 100644 (file)
@@ -713,6 +713,39 @@ b
     </tbody>
    </tgroup>
   </table>
+
+  <para>
+   In addition to these operators and functions, values of
+   the <type>hstore</type> type can be subscripted, allowing them to act
+   like associative arrays.  Only a single subscript of type <type>text</type>
+   can be specified; it is interpreted as a key and the corresponding
+   value is fetched or stored.  For example,
+
+<programlisting>
+CREATE TABLE mytable (h hstore);
+INSERT INTO mytable VALUES ('a=>b, c=>d');
+SELECT h['a'] FROM mytable;
+ h
+---
+ b
+(1 row)
+
+UPDATE mytable SET h['c'] = 'new';
+SELECT h FROM mytable;
+          h
+----------------------
+ "a"=>"b", "c"=>"new"
+(1 row)
+</programlisting>
+
+   A subscripted fetch returns <literal>NULL</literal> if the subscript
+   is <literal>NULL</literal> or that key does not exist in
+   the <type>hstore</type>.  (Thus, a subscripted fetch is not greatly
+   different from the <literal>-&gt;</literal> operator.)
+   A subscripted update fails if the subscript is <literal>NULL</literal>;
+   otherwise, it replaces the value for that key, adding an entry to
+   the <type>hstore</type> if the key does not already exist.
+  </para>
  </sect2>
 
  <sect2>
@@ -767,7 +800,16 @@ CREATE INDEX hidx ON testhstore USING HASH (h);
   <para>
    Add a key, or update an existing key with a new value:
 <programlisting>
+UPDATE tab SET h['c'] = '3';
+</programlisting>
+   Another way to do the same thing is:
+<programlisting>
 UPDATE tab SET h = h || hstore('c', '3');
+</programlisting>
+   If multiple keys are to be added or changed in one operation,
+   the concatenation approach is more efficient than subscripting:
+<programlisting>
+UPDATE tab SET h = h || hstore(array['q', 'w'], array['11', '12']);
 </programlisting>
   </para>
 
index d909ee0d33b266cd00aef2b74b50ce220d001bb0..d575f166142451273d50dba85e7cf29915fc3e02 100644 (file)
@@ -333,9 +333,11 @@ CREATE TYPE <replaceable class="parameter">name</replaceable>
    return an <type>internal</type> result, which is a pointer to a struct
    of methods (functions) that implement subscripting.
    The detailed API for subscript functions appears
-   in <filename>src/include/nodes/subscripting.h</filename>;
-   it may also be useful to read the array implementation
-   in <filename>src/backend/utils/adt/arraysubs.c</filename>.
+   in <filename>src/include/nodes/subscripting.h</filename>.
+   It may also be useful to read the array implementation
+   in <filename>src/backend/utils/adt/arraysubs.c</filename>,
+   or the simpler code
+   in <filename>contrib/hstore/hstore_subs.c</filename>.
    Additional information appears in
    <xref linkend="sql-createtype-array"/> below.
   </para>