Rename jsonb_hash_ops to jsonb_path_ops.

author Tom Lane <tgl@sss.pgh.pa.us>

Sun, 11 May 2014 16:06:04 +0000 (12:06 -0400)

committer Tom Lane <tgl@sss.pgh.pa.us>

Sun, 11 May 2014 16:06:04 +0000 (12:06 -0400)
author Tom Lane <tgl@sss.pgh.pa.us>
Sun, 11 May 2014 16:06:04 +0000 (12:06 -0400)
committer Tom Lane <tgl@sss.pgh.pa.us>
Sun, 11 May 2014 16:06:04 +0000 (12:06 -0400)
diff --git a/doc/src/sgml/gin.sgml b/doc/src/sgml/gin.sgml

index 0b3d6eeb633b2d5ad018f231e7f867e7efbb740b..1cbc73c70cfed835820697fad15350024cacf07c 100644 (file)
--- a/doc/src/sgml/gin.sgml
+++ b/doc/src/sgml/gin.sgml
@@ -395,7 +395,7 @@
        </entry>
       </row>
       <row>
-      <entry><literal>jsonb_hash_ops</></entry>
+      <entry><literal>jsonb_path_ops</></entry>
        <entry><type>jsonb</></entry>
        <entry>
         <literal>@&gt;</>
@@ -415,7 +415,7 @@
  
   <para>
    Of the two operator classes for type <type>jsonb</>, <literal>jsonb_ops</>
-  is the default.  <literal>jsonb_hash_ops</> supports fewer operators but
+  is the default.  <literal>jsonb_path_ops</> supports fewer operators but
    offers better performance for those operators.
    See <xref linkend="json-indexing"> for details.
   </para>
diff --git a/doc/src/sgml/json.sgml b/doc/src/sgml/json.sgml

index 518fe63873e57c2eba741301d58c5d5927d615f8..66426189ca53fbf63786996d6917c22b0174e82d 100644 (file)
--- a/doc/src/sgml/json.sgml
+++ b/doc/src/sgml/json.sgml
@@ -156,7 +156,7 @@
     </table>
  
   <sect2 id="json-keys-elements">
-  <title><type>jsonb</> Input and Output Syntax</title>
+  <title>JSON Input and Output Syntax</title>
    <para>
     The input/output syntax for the JSON data types is as specified in
     <acronym>RFC</> 7159.
@@ -366,11 +366,11 @@ SELECT '"foo"'::jsonb ? 'foo';
    <programlisting>
  CREATE INDEX idxgin ON api USING gin (jdoc);
    </programlisting>
-    The non-default GIN operator class <literal>jsonb_hash_ops</>
+    The non-default GIN operator class <literal>jsonb_path_ops</>
      supports indexing the <literal>@&gt;</> operator only.
      An example of creating an index with this operator class is:
    <programlisting>
-CREATE INDEX idxginh ON api USING gin (jdoc jsonb_hash_ops);
+CREATE INDEX idxginp ON api USING gin (jdoc jsonb_path_ops);
    </programlisting>
    </para>
  
@@ -444,10 +444,10 @@ SELECT jdoc-&gt;'guid', jdoc-&gt;'name' FROM api WHERE jdoc @&gt; '{"tags": ["qu
    </para>
  
    <para>
-    Although the <literal>jsonb_hash_ops</literal> operator class supports
+    Although the <literal>jsonb_path_ops</literal> operator class supports
      only queries with the <literal>@&gt;</> operator, it has notable
      performance advantages over the default operator
-    class <literal>jsonb_ops</literal>.  A <literal>jsonb_hash_ops</literal>
+    class <literal>jsonb_ops</literal>.  A <literal>jsonb_path_ops</literal>
      index is usually much smaller than a <literal>jsonb_ops</literal>
      index over the same data, and the specificity of searches is better,
      particularly when queries contain keys that appear frequently in the
@@ -455,6 +455,41 @@ SELECT jdoc-&gt;'guid', jdoc-&gt;'name' FROM api WHERE jdoc @&gt; '{"tags": ["qu
      than with the default operator class.
    </para>
  
+  <para>
+    The technical difference between a <literal>jsonb_ops</literal>
+    and a <literal>jsonb_path_ops</literal> GIN index is that the former
+    creates independent index items for each key and value in the data,
+    while the latter creates index items only for each value in the
+    data.<footnote><para>For this purpose, the term <quote>value</>
+    includes array elements, though JSON terminology sometimes considers
+    array elements distinct from values within objects.</para></footnote>
+    But in <literal>jsonb_path_ops</literal>, each index item is a hash
+    of both the value and the key(s) leading to it; for example to index
+    <literal>{"foo": {"bar": "baz"}}</literal>, a single index item would
+    be created incorporating all three of <literal>foo</>, <literal>bar</>,
+    and <literal>baz</> into the hash value.  Thus a containment query
+    looking for this structure would result in an extremely specific index
+    search; but there is no way at all to find out whether <literal>foo</>
+    appears as a key.  On the other hand, a <literal>jsonb_ops</literal>
+    index would create three index items representing <literal>foo</>,
+    <literal>bar</>, and <literal>baz</> separately; then to do the
+    containment query, it would look for rows containing all three of
+    these items.  While GIN indexes can perform such an AND search fairly
+    efficiently, it will still be less specific and slower than the
+    equivalent <literal>jsonb_path_ops</literal> search, especially if
+    there are a very large number of rows containing any single one of the
+    three index items.
+  </para>
+
+  <para>
+    A disadvantage of the <literal>jsonb_path_ops</literal> approach is
+    that it produces no index entries for JSON structures not containing
+    any values, such as <literal>{"a": {}}</literal>.  If a search for
+    documents containing such a structure is requested, it will require a
+    full-index scan, which is quite slow.  <literal>jsonb_path_ops</> is
+    therefore ill-suited for applications that often perform such searches.
+  </para>
+
    <para>
      <type>jsonb</> also supports <literal>btree</> and <literal>hash</>
      indexes.  These are usually useful only if it's important to check
diff --git a/src/backend/utils/adt/jsonb_gin.c b/src/backend/utils/adt/jsonb_gin.c

index 57a0b2c8a3cebcb3d1f78e598fe4c0204e28ca3d..069ee03080308c28425021a948f201d40d144ef4 100644 (file)
--- a/src/backend/utils/adt/jsonb_gin.c
+++ b/src/backend/utils/adt/jsonb_gin.c
@@ -315,9 +315,9 @@ gin_triconsistent_jsonb(PG_FUNCTION_ARGS)
  
  /*
   *
- * jsonb_hash_ops GIN opclass support functions
+ * jsonb_path_ops GIN opclass support functions
   *
- * In a jsonb_hash_ops index, the GIN keys are uint32 hashes, one per JSON
+ * In a jsonb_path_ops index, the GIN keys are uint32 hashes, one per JSON
   * value; but the JSON key(s) leading to each value are also included in its
   * hash computation.  This means we can only support containment queries,
   * but the index can distinguish, for example, {"foo": 42} from {"bar": 42}
@@ -326,7 +326,7 @@ gin_triconsistent_jsonb(PG_FUNCTION_ARGS)
   */
  
  Datum
-gin_extract_jsonb_hash(PG_FUNCTION_ARGS)
+gin_extract_jsonb_path(PG_FUNCTION_ARGS)
  {
         Jsonb      *jb = PG_GETARG_JSONB(0);
         int32      *nentries = (int32 *) PG_GETARG_POINTER(1);
@@ -349,7 +349,7 @@ gin_extract_jsonb_hash(PG_FUNCTION_ARGS)
         /* Otherwise, use 2 * root count as initial estimate of result size */
         entries = (Datum *) palloc(sizeof(Datum) * total);
  
-       /* We keep a stack of hashes corresponding to parent key levels */
+       /* We keep a stack of partial hashes corresponding to parent key levels */
         tail.parent = NULL;
         tail.hash = 0;
         stack = &tail;
@@ -439,7 +439,7 @@ gin_extract_jsonb_hash(PG_FUNCTION_ARGS)
  }
  
  Datum
-gin_extract_jsonb_query_hash(PG_FUNCTION_ARGS)
+gin_extract_jsonb_query_path(PG_FUNCTION_ARGS)
  {
         int32      *nentries = (int32 *) PG_GETARG_POINTER(1);
         StrategyNumber strategy = PG_GETARG_UINT16(2);
@@ -449,9 +449,9 @@ gin_extract_jsonb_query_hash(PG_FUNCTION_ARGS)
         if (strategy != JsonbContainsStrategyNumber)
                 elog(ERROR, "unrecognized strategy number: %d", strategy);
  
-       /* Query is a jsonb, so just apply gin_extract_jsonb_hash ... */
+       /* Query is a jsonb, so just apply gin_extract_jsonb_path ... */
         entries = (Datum *)
-               DatumGetPointer(DirectFunctionCall2(gin_extract_jsonb_hash,
+               DatumGetPointer(DirectFunctionCall2(gin_extract_jsonb_path,
                                                                                         PG_GETARG_DATUM(0),
                                                                                         PointerGetDatum(nentries)));
  
@@ -463,7 +463,7 @@ gin_extract_jsonb_query_hash(PG_FUNCTION_ARGS)
  }
  
  Datum
-gin_consistent_jsonb_hash(PG_FUNCTION_ARGS)
+gin_consistent_jsonb_path(PG_FUNCTION_ARGS)
  {
         bool       *check = (bool *) PG_GETARG_POINTER(0);
         StrategyNumber strategy = PG_GETARG_UINT16(1);
@@ -480,13 +480,12 @@ gin_consistent_jsonb_hash(PG_FUNCTION_ARGS)
                 elog(ERROR, "unrecognized strategy number: %d", strategy);
  
         /*
-        * jsonb_hash_ops is necessarily lossy, not only because of hash
+        * jsonb_path_ops is necessarily lossy, not only because of hash
          * collisions but also because it doesn't preserve complete information
          * about the structure of the JSON object.  Besides, there are some
-        * special rules around the containment of raw scalar arrays and regular
-        * arrays that are not handled here.  So we must always recheck a match.
-        * However, if not all of the keys are present, the tuple certainly
-        * doesn't match.
+        * special rules around the containment of raw scalars in arrays that are
+        * not handled here.  So we must always recheck a match.  However, if not
+        * all of the keys are present, the tuple certainly doesn't match.
          */
         *recheck = true;
         for (i = 0; i < nkeys; i++)
@@ -502,7 +501,7 @@ gin_consistent_jsonb_hash(PG_FUNCTION_ARGS)
  }
  
  Datum
-gin_triconsistent_jsonb_hash(PG_FUNCTION_ARGS)
+gin_triconsistent_jsonb_path(PG_FUNCTION_ARGS)
  {
         GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
         StrategyNumber strategy = PG_GETARG_UINT16(1);
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h

index 76422e55872adb56281f24afc180f7628a079ef6..c46a92b82f73eb8a3194de23079048a805cbbf2f 100644 (file)
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
   */
  
  /*                                                     yyyymmddN */
-#define CATALOG_VERSION_NO     201405093
+#define CATALOG_VERSION_NO     201405111
  
  #endif
diff --git a/src/include/catalog/pg_amop.h b/src/include/catalog/pg_amop.h

index 8efd3be3c668a196c60b6a1a18dc69e568267eb4..264059f17e01f9fb75076564762c5f9efc3751bc 100644 (file)
--- a/src/include/catalog/pg_amop.h
+++ b/src/include/catalog/pg_amop.h
@@ -787,12 +787,12 @@ DATA(insert (     4033   3802 3802 4 s    3245 403 0 ));
  DATA(insert (  4033   3802 3802 5 s    3243 403 0 ));
  
  /*
- * hash jsonb ops
+ * hash jsonb_ops
   */
  DATA(insert (  4034   3802 3802 1 s 3240 405 0 ));
  
  /*
- * GIN jsonb ops
+ * GIN jsonb_ops
   */
  DATA(insert (  4036   3802 3802 7 s 3246 2742 0 ));
  DATA(insert (  4036   3802 25 9 s 3247 2742 0 ));
@@ -800,7 +800,7 @@ DATA(insert (       4036   3802 1009 10 s 3248 2742 0 ));
  DATA(insert (  4036   3802 1009 11 s 3249 2742 0 ));
  
  /*
- * GIN jsonb hash ops
+ * GIN jsonb_path_ops
   */
  DATA(insert (  4037   3802 3802 7 s 3246 2742 0 ));
  
diff --git a/src/include/catalog/pg_opclass.h b/src/include/catalog/pg_opclass.h

index ecf70639c86c477e75b07454a5ae251ae3e4e509..369888665fe9041833b0e4bc9e86b02dc0b5bc09 100644 (file)
--- a/src/include/catalog/pg_opclass.h
+++ b/src/include/catalog/pg_opclass.h
@@ -232,6 +232,6 @@ DATA(insert (       4000    text_ops                        PGNSP PGUID 4017  25 t 0 ));
  DATA(insert (  403             jsonb_ops                       PGNSP PGUID 4033  3802 t 0 ));
  DATA(insert (  405             jsonb_ops                       PGNSP PGUID 4034  3802 t 0 ));
  DATA(insert (  2742    jsonb_ops                       PGNSP PGUID 4036  3802 t 25 ));
-DATA(insert (  2742    jsonb_hash_ops          PGNSP PGUID 4037  3802 f 23 ));
+DATA(insert (  2742    jsonb_path_ops          PGNSP PGUID 4037  3802 f 23 ));
  
  #endif   /* PG_OPCLASS_H */
diff --git a/src/include/catalog/pg_opfamily.h b/src/include/catalog/pg_opfamily.h

index 9e8f4ac5b6310edf3ad3e2652bfa0b3556842481..c83ac8c1a44be14fa7eaece1a8c22a675edfbf14 100644 (file)
--- a/src/include/catalog/pg_opfamily.h
+++ b/src/include/catalog/pg_opfamily.h
@@ -148,11 +148,11 @@ DATA(insert OID = 3474 (  4000    range_ops               PGNSP PGUID ));
  DATA(insert OID = 4015 (       4000    quad_point_ops  PGNSP PGUID ));
  DATA(insert OID = 4016 (       4000    kd_point_ops    PGNSP PGUID ));
  DATA(insert OID = 4017 (       4000    text_ops                PGNSP PGUID ));
+#define TEXT_SPGIST_FAM_OID 4017
  DATA(insert OID = 4033 (       403             jsonb_ops               PGNSP PGUID ));
  DATA(insert OID = 4034 (       405             jsonb_ops               PGNSP PGUID ));
  DATA(insert OID = 4035 (       783             jsonb_ops               PGNSP PGUID ));
  DATA(insert OID = 4036 (       2742    jsonb_ops               PGNSP PGUID ));
-DATA(insert OID = 4037 (       2742    jsonb_hash_ops  PGNSP PGUID ));
-#define TEXT_SPGIST_FAM_OID 4017
+DATA(insert OID = 4037 (       2742    jsonb_path_ops  PGNSP PGUID ));
  
  #endif   /* PG_OPFAMILY_H */
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h

index e601ccd09cc0466ee397d8b9c1dfc206911eaebc..72170af0e840bf5067456552bffb1b3de5484ca5 100644 (file)
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -4645,13 +4645,13 @@ DATA(insert OID = 3484 (  gin_consistent_jsonb  PGNSP PGUID 12 1 0 0 0 f f f f t
  DESCR("GIN support");
  DATA(insert OID = 3488 (  gin_triconsistent_jsonb      PGNSP PGUID 12 1 0 0 0 f f f f t f i 7 0 16 "2281 21 2277 23 2281 2281 2281" _null_ _null_ _null_ _null_ gin_triconsistent_jsonb _null_ _null_ _null_ ));
  DESCR("GIN support");
-DATA(insert OID = 3485 (  gin_extract_jsonb_hash  PGNSP PGUID 12 1 0 0 0 f f f f t f i 3 0 2281 "2281 2281 2281" _null_ _null_ _null_ _null_ gin_extract_jsonb_hash _null_ _null_ _null_ ));
+DATA(insert OID = 3485 (  gin_extract_jsonb_path  PGNSP PGUID 12 1 0 0 0 f f f f t f i 3 0 2281 "2281 2281 2281" _null_ _null_ _null_ _null_ gin_extract_jsonb_path _null_ _null_ _null_ ));
  DESCR("GIN support");
-DATA(insert OID = 3486 (  gin_extract_jsonb_query_hash PGNSP PGUID 12 1 0 0 0 f f f f t f i 7 0 2281 "2277 2281 21 2281 2281 2281 2281" _null_ _null_ _null_ _null_ gin_extract_jsonb_query_hash _null_ _null_ _null_ ));
+DATA(insert OID = 3486 (  gin_extract_jsonb_query_path PGNSP PGUID 12 1 0 0 0 f f f f t f i 7 0 2281 "2277 2281 21 2281 2281 2281 2281" _null_ _null_ _null_ _null_ gin_extract_jsonb_query_path _null_ _null_ _null_ ));
  DESCR("GIN support");
-DATA(insert OID = 3487 (  gin_consistent_jsonb_hash  PGNSP PGUID 12 1 0 0 0 f f f f t f i 8 0 16 "2281 21 2277 23 2281 2281 2281 2281" _null_ _null_ _null_ _null_ gin_consistent_jsonb_hash _null_ _null_ _null_ ));
+DATA(insert OID = 3487 (  gin_consistent_jsonb_path  PGNSP PGUID 12 1 0 0 0 f f f f t f i 8 0 16 "2281 21 2277 23 2281 2281 2281 2281" _null_ _null_ _null_ _null_ gin_consistent_jsonb_path _null_ _null_ _null_ ));
  DESCR("GIN support");
-DATA(insert OID = 3489 (  gin_triconsistent_jsonb_hash PGNSP PGUID 12 1 0 0 0 f f f f t f i 7 0 16 "2281 21 2277 23 2281 2281 2281" _null_ _null_ _null_ _null_ gin_triconsistent_jsonb_hash _null_ _null_ _null_ ));
+DATA(insert OID = 3489 (  gin_triconsistent_jsonb_path PGNSP PGUID 12 1 0 0 0 f f f f t f i 7 0 16 "2281 21 2277 23 2281 2281 2281" _null_ _null_ _null_ _null_ gin_triconsistent_jsonb_path _null_ _null_ _null_ ));
  DESCR("GIN support");
  
  /* txid */
diff --git a/src/include/utils/jsonb.h b/src/include/utils/jsonb.h

index bb8c380ee3a4a2253a017688e9e75c098302248c..add76280ba1733e79fddac3a35e05bbd21177e18 100644 (file)
--- a/src/include/utils/jsonb.h
+++ b/src/include/utils/jsonb.h
@@ -332,18 +332,18 @@ extern Datum jsonb_eq(PG_FUNCTION_ARGS);
  extern Datum jsonb_cmp(PG_FUNCTION_ARGS);
  extern Datum jsonb_hash(PG_FUNCTION_ARGS);
  
-/* GIN support functions */
+/* GIN support functions for jsonb_ops */
  extern Datum gin_compare_jsonb(PG_FUNCTION_ARGS);
  extern Datum gin_extract_jsonb(PG_FUNCTION_ARGS);
  extern Datum gin_extract_jsonb_query(PG_FUNCTION_ARGS);
  extern Datum gin_consistent_jsonb(PG_FUNCTION_ARGS);
  extern Datum gin_triconsistent_jsonb(PG_FUNCTION_ARGS);
  
-/* GIN hash opclass functions */
-extern Datum gin_extract_jsonb_hash(PG_FUNCTION_ARGS);
-extern Datum gin_extract_jsonb_query_hash(PG_FUNCTION_ARGS);
-extern Datum gin_consistent_jsonb_hash(PG_FUNCTION_ARGS);
-extern Datum gin_triconsistent_jsonb_hash(PG_FUNCTION_ARGS);
+/* GIN support functions for jsonb_path_ops */
+extern Datum gin_extract_jsonb_path(PG_FUNCTION_ARGS);
+extern Datum gin_extract_jsonb_query_path(PG_FUNCTION_ARGS);
+extern Datum gin_consistent_jsonb_path(PG_FUNCTION_ARGS);
+extern Datum gin_triconsistent_jsonb_path(PG_FUNCTION_ARGS);
  
  /* Support functions */
  extern int     compareJsonbContainers(JsonbContainer *a, JsonbContainer *b);
diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out

index c5a7d64ae46d8d42f79b9e1f46e44ee7a8a3c336..ae7c5068119e7054b5febe7b75ac3992dd244019 100644 (file)
--- a/src/test/regress/expected/jsonb.out
+++ b/src/test/regress/expected/jsonb.out
@@ -1685,9 +1685,9 @@ SELECT count(*) FROM testjsonb WHERE j = '{"pos":98, "line":371, "node":"CBA", "
       1
  (1 row)
  
---gin hash
+--gin path opclass
  DROP INDEX jidx;
-CREATE INDEX jidx ON testjsonb USING gin (j jsonb_hash_ops);
+CREATE INDEX jidx ON testjsonb USING gin (j jsonb_path_ops);
  SET enable_seqscan = off;
  SELECT count(*) FROM testjsonb WHERE j @> '{"wait":null}';
   count 
diff --git a/src/test/regress/expected/jsonb_1.out b/src/test/regress/expected/jsonb_1.out

index 0e3ebd161ef584f8ebd33a988ca9910b24e52549..38a95b43f8c17ed536819abb1745fc667a177463 100644 (file)
--- a/src/test/regress/expected/jsonb_1.out
+++ b/src/test/regress/expected/jsonb_1.out
@@ -1685,9 +1685,9 @@ SELECT count(*) FROM testjsonb WHERE j = '{"pos":98, "line":371, "node":"CBA", "
       1
  (1 row)
  
---gin hash
+--gin path opclass
  DROP INDEX jidx;
-CREATE INDEX jidx ON testjsonb USING gin (j jsonb_hash_ops);
+CREATE INDEX jidx ON testjsonb USING gin (j jsonb_path_ops);
  SET enable_seqscan = off;
  SELECT count(*) FROM testjsonb WHERE j @> '{"wait":null}';
   count 
diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql

index 3e9048911be497c64fd111981c84c62edaaee26c..7527925b2cb1ea98a73065b0fa004b5fd78c1033 100644 (file)
--- a/src/test/regress/sql/jsonb.sql
+++ b/src/test/regress/sql/jsonb.sql
@@ -391,9 +391,9 @@ SET enable_seqscan = off;
  SELECT count(*) FROM testjsonb WHERE j > '{"p":1}';
  SELECT count(*) FROM testjsonb WHERE j = '{"pos":98, "line":371, "node":"CBA", "indexed":true}';
  
---gin hash
+--gin path opclass
  DROP INDEX jidx;
-CREATE INDEX jidx ON testjsonb USING gin (j jsonb_hash_ops);
+CREATE INDEX jidx ON testjsonb USING gin (j jsonb_path_ops);
  SET enable_seqscan = off;
  
  SELECT count(*) FROM testjsonb WHERE j @> '{"wait":null}';
author	Tom Lane <tgl@sss.pgh.pa.us>
	Sun, 11 May 2014 16:06:04 +0000 (12:06 -0400)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Sun, 11 May 2014 16:06:04 +0000 (12:06 -0400)
doc/src/sgml/gin.sgml		patch \| blob \| blame \| history
doc/src/sgml/json.sgml		patch \| blob \| blame \| history
src/backend/utils/adt/jsonb_gin.c		patch \| blob \| blame \| history
src/include/catalog/catversion.h		patch \| blob \| blame \| history
src/include/catalog/pg_amop.h		patch \| blob \| blame \| history
src/include/catalog/pg_opclass.h		patch \| blob \| blame \| history
src/include/catalog/pg_opfamily.h		patch \| blob \| blame \| history
src/include/catalog/pg_proc.h		patch \| blob \| blame \| history
src/include/utils/jsonb.h		patch \| blob \| blame \| history
src/test/regress/expected/jsonb.out		patch \| blob \| blame \| history
src/test/regress/expected/jsonb_1.out		patch \| blob \| blame \| history
src/test/regress/sql/jsonb.sql		patch \| blob \| blame \| history