Rethink extraction of collation dependencies.
authorTom Lane <tgl@sss.pgh.pa.us>
Sat, 17 Apr 2021 02:23:46 +0000 (22:23 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Sat, 17 Apr 2021 02:23:46 +0000 (22:23 -0400)
As it stands, find_expr_references_walker() pays attention to leaf-node
collation fields while ignoring the input collations of actual function
and operator nodes.  That seems exactly backwards from a semantic
standpoint, and it leads to reporting dependencies on collations that
really have nothing to do with the expression's behavior.

Hence, rewrite to look at function input collations instead.  This
isn't completely perfect either; it fails to account for the behavior
of record_eq and its siblings.  (The previous coding at least gave an
approximation of that, though I think it could be fooled pretty easily
into considering the columns of irrelevant composite types.)  We may
be able to improve on this later, but for now this should satisfy the
buildfarm members that didn't like ef387bed8.

In passing fix some oversights in GetTypeCollations(), and get
rid of its duplicative de-duplications.  (I'm worried that it's
still potentially O(N^2) or worse, but this makes it a little
better.)

Discussion: https://postgr.es/m/3564817.1618420687@sss.pgh.pa.us

src/backend/catalog/dependency.c
src/backend/catalog/pg_type.c
src/test/regress/expected/collate.icu.utf8.out
src/test/regress/sql/collate.icu.utf8.sql

index 8d8e926c21c79dad6e275350282173bb4821d167..41093ea6aee0d0b1a6b849ca763e570ea181911f 100644 (file)
@@ -1835,8 +1835,17 @@ recordDependencyOnSingleRelExpr(const ObjectAddress *depender,
  * the datatype.  However we do need a type dependency if there is no such
  * indirect dependency, as for example in Const and CoerceToDomain nodes.
  *
- * Similarly, we don't need to create dependencies on collations except where
- * the collation is being freshly introduced to the expression.
+ * Collations are handled primarily by recording the inputcollid's of node
+ * types that have them, as those are the ones that are semantically
+ * significant during expression evaluation.  We also record the collation of
+ * CollateExpr nodes, since those will be needed to print such nodes even if
+ * they don't really affect semantics.  Collations of leaf nodes such as Vars
+ * can be ignored on the grounds that if they're not default, they came from
+ * the referenced object (e.g., a table column), so the dependency on that
+ * object is enough.  (Note: in a post-const-folding expression tree, a
+ * CollateExpr's collation could have been absorbed into a Const or
+ * RelabelType node.  While ruleutils.c prints such collations for clarity,
+ * we may ignore them here as they have no semantic effect.)
  */
 static bool
 find_expr_references_walker(Node *node,
@@ -1876,29 +1885,6 @@ find_expr_references_walker(Node *node,
                        /* If it's a plain relation, reference this column */
                        add_object_address(OCLASS_CLASS, rte->relid, var->varattno,
                                                           context->addrs);
-
-                       /* Top-level collation if valid */
-                       if (OidIsValid(var->varcollid))
-                               add_object_address(OCLASS_COLLATION, var->varcollid, 0,
-                                                                  context->addrs);
-                       /* Otherwise, it may be a type with internal collations */
-                       else if (var->vartype >= FirstNormalObjectId)
-                       {
-                               List       *collations;
-                               ListCell   *lc;
-
-                               collations = GetTypeCollations(var->vartype);
-
-                               foreach(lc, collations)
-                               {
-                                       Oid                     coll = lfirst_oid(lc);
-
-                                       if (OidIsValid(coll))
-                                               add_object_address(OCLASS_COLLATION,
-                                                                                  lfirst_oid(lc), 0,
-                                                                                  context->addrs);
-                               }
-                       }
                }
 
                /*
@@ -1920,15 +1906,6 @@ find_expr_references_walker(Node *node,
                add_object_address(OCLASS_TYPE, con->consttype, 0,
                                                   context->addrs);
 
-               /*
-                * We must also depend on the constant's collation: it could be
-                * different from the datatype's, if a CollateExpr was const-folded to
-                * a simple constant.
-                */
-               if (OidIsValid(con->constcollid))
-                       add_object_address(OCLASS_COLLATION, con->constcollid, 0,
-                                                          context->addrs);
-
                /*
                 * If it's a regclass or similar literal referring to an existing
                 * object, add a reference to that object.  (Currently, only the
@@ -2013,10 +1990,6 @@ find_expr_references_walker(Node *node,
                /* A parameter must depend on the parameter's datatype */
                add_object_address(OCLASS_TYPE, param->paramtype, 0,
                                                   context->addrs);
-               /* and its collation, just as for Consts */
-               if (OidIsValid(param->paramcollid))
-                       add_object_address(OCLASS_COLLATION, param->paramcollid, 0,
-                                                          context->addrs);
        }
        else if (IsA(node, FuncExpr))
        {
@@ -2024,6 +1997,9 @@ find_expr_references_walker(Node *node,
 
                add_object_address(OCLASS_PROC, funcexpr->funcid, 0,
                                                   context->addrs);
+               if (OidIsValid(funcexpr->inputcollid))
+                       add_object_address(OCLASS_COLLATION, funcexpr->inputcollid, 0,
+                                                          context->addrs);
                /* fall through to examine arguments */
        }
        else if (IsA(node, OpExpr))
@@ -2032,6 +2008,9 @@ find_expr_references_walker(Node *node,
 
                add_object_address(OCLASS_OPERATOR, opexpr->opno, 0,
                                                   context->addrs);
+               if (OidIsValid(opexpr->inputcollid))
+                       add_object_address(OCLASS_COLLATION, opexpr->inputcollid, 0,
+                                                          context->addrs);
                /* fall through to examine arguments */
        }
        else if (IsA(node, DistinctExpr))
@@ -2040,6 +2019,9 @@ find_expr_references_walker(Node *node,
 
                add_object_address(OCLASS_OPERATOR, distinctexpr->opno, 0,
                                                   context->addrs);
+               if (OidIsValid(distinctexpr->inputcollid))
+                       add_object_address(OCLASS_COLLATION, distinctexpr->inputcollid, 0,
+                                                          context->addrs);
                /* fall through to examine arguments */
        }
        else if (IsA(node, NullIfExpr))
@@ -2048,6 +2030,9 @@ find_expr_references_walker(Node *node,
 
                add_object_address(OCLASS_OPERATOR, nullifexpr->opno, 0,
                                                   context->addrs);
+               if (OidIsValid(nullifexpr->inputcollid))
+                       add_object_address(OCLASS_COLLATION, nullifexpr->inputcollid, 0,
+                                                          context->addrs);
                /* fall through to examine arguments */
        }
        else if (IsA(node, ScalarArrayOpExpr))
@@ -2056,6 +2041,9 @@ find_expr_references_walker(Node *node,
 
                add_object_address(OCLASS_OPERATOR, opexpr->opno, 0,
                                                   context->addrs);
+               if (OidIsValid(opexpr->inputcollid))
+                       add_object_address(OCLASS_COLLATION, opexpr->inputcollid, 0,
+                                                          context->addrs);
                /* fall through to examine arguments */
        }
        else if (IsA(node, Aggref))
@@ -2064,6 +2052,9 @@ find_expr_references_walker(Node *node,
 
                add_object_address(OCLASS_PROC, aggref->aggfnoid, 0,
                                                   context->addrs);
+               if (OidIsValid(aggref->inputcollid))
+                       add_object_address(OCLASS_COLLATION, aggref->inputcollid, 0,
+                                                          context->addrs);
                /* fall through to examine arguments */
        }
        else if (IsA(node, WindowFunc))
@@ -2072,6 +2063,9 @@ find_expr_references_walker(Node *node,
 
                add_object_address(OCLASS_PROC, wfunc->winfnoid, 0,
                                                   context->addrs);
+               if (OidIsValid(wfunc->inputcollid))
+                       add_object_address(OCLASS_COLLATION, wfunc->inputcollid, 0,
+                                                          context->addrs);
                /* fall through to examine arguments */
        }
        else if (IsA(node, SubscriptingRef))
@@ -2116,10 +2110,6 @@ find_expr_references_walker(Node *node,
                else
                        add_object_address(OCLASS_TYPE, fselect->resulttype, 0,
                                                           context->addrs);
-               /* the collation might not be referenced anywhere else, either */
-               if (OidIsValid(fselect->resultcollid))
-                       add_object_address(OCLASS_COLLATION, fselect->resultcollid, 0,
-                                                          context->addrs);
        }
        else if (IsA(node, FieldStore))
        {
@@ -2146,10 +2136,6 @@ find_expr_references_walker(Node *node,
                /* since there is no function dependency, need to depend on type */
                add_object_address(OCLASS_TYPE, relab->resulttype, 0,
                                                   context->addrs);
-               /* the collation might not be referenced anywhere else, either */
-               if (OidIsValid(relab->resultcollid))
-                       add_object_address(OCLASS_COLLATION, relab->resultcollid, 0,
-                                                          context->addrs);
        }
        else if (IsA(node, CoerceViaIO))
        {
@@ -2158,10 +2144,6 @@ find_expr_references_walker(Node *node,
                /* since there is no exposed function, need to depend on type */
                add_object_address(OCLASS_TYPE, iocoerce->resulttype, 0,
                                                   context->addrs);
-               /* the collation might not be referenced anywhere else, either */
-               if (OidIsValid(iocoerce->resultcollid))
-                       add_object_address(OCLASS_COLLATION, iocoerce->resultcollid, 0,
-                                                          context->addrs);
        }
        else if (IsA(node, ArrayCoerceExpr))
        {
@@ -2170,10 +2152,6 @@ find_expr_references_walker(Node *node,
                /* as above, depend on type */
                add_object_address(OCLASS_TYPE, acoerce->resulttype, 0,
                                                   context->addrs);
-               /* the collation might not be referenced anywhere else, either */
-               if (OidIsValid(acoerce->resultcollid))
-                       add_object_address(OCLASS_COLLATION, acoerce->resultcollid, 0,
-                                                          context->addrs);
                /* fall through to examine arguments */
        }
        else if (IsA(node, ConvertRowtypeExpr))
@@ -2213,6 +2191,24 @@ find_expr_references_walker(Node *node,
                        add_object_address(OCLASS_OPFAMILY, lfirst_oid(l), 0,
                                                           context->addrs);
                }
+               foreach(l, rcexpr->inputcollids)
+               {
+                       Oid                     inputcollid = lfirst_oid(l);
+
+                       if (OidIsValid(inputcollid))
+                               add_object_address(OCLASS_COLLATION, inputcollid, 0,
+                                                                  context->addrs);
+               }
+               /* fall through to examine arguments */
+       }
+       else if (IsA(node, MinMaxExpr))
+       {
+               MinMaxExpr *mmexpr = (MinMaxExpr *) node;
+
+               /* minmaxtype will match one of the inputs, so no need to record it */
+               if (OidIsValid(mmexpr->inputcollid))
+                       add_object_address(OCLASS_COLLATION, mmexpr->inputcollid, 0,
+                                                          context->addrs);
                /* fall through to examine arguments */
        }
        else if (IsA(node, CoerceToDomain))
index ec5d1224323cff438983ecef89e23a87cb5dcf11..64f6c7238f7c5eb910573e7cffbd06a4a309ad91 100644 (file)
@@ -535,8 +535,12 @@ GetTypeCollations(Oid typeoid)
                elog(ERROR, "cache lookup failed for type %u", typeoid);
        typeTup = (Form_pg_type) GETSTRUCT(tuple);
 
+       /*
+        * If the type has a typcollation attribute, report that and we're done.
+        * Otherwise, it could be a container type that we should recurse into.
+        */
        if (OidIsValid(typeTup->typcollation))
-               result = list_append_unique_oid(result, typeTup->typcollation);
+               result = list_make1_oid(typeTup->typcollation);
        else if (typeTup->typtype == TYPTYPE_COMPOSITE)
        {
                Relation        rel = relation_open(typeTup->typrelid, AccessShareLock);
@@ -546,6 +550,8 @@ GetTypeCollations(Oid typeoid)
                {
                        Form_pg_attribute att = TupleDescAttr(desc, i);
 
+                       if (att->attisdropped)
+                               continue;
                        if (OidIsValid(att->attcollation))
                                result = list_append_unique_oid(result, att->attcollation);
                        else
@@ -558,21 +564,24 @@ GetTypeCollations(Oid typeoid)
        else if (typeTup->typtype == TYPTYPE_DOMAIN)
        {
                Assert(OidIsValid(typeTup->typbasetype));
-
-               result = list_concat_unique_oid(result,
-                                                                               GetTypeCollations(typeTup->typbasetype));
+               result = GetTypeCollations(typeTup->typbasetype);
        }
        else if (typeTup->typtype == TYPTYPE_RANGE)
        {
-               Oid                     rangeid = get_range_subtype(typeTup->oid);
+               Oid                     rangecoll = get_range_collation(typeTup->oid);
 
-               Assert(OidIsValid(rangeid));
+               if (OidIsValid(rangecoll))
+                       result = list_make1_oid(rangecoll);
+               else
+               {
+                       Oid                     rangeid = get_range_subtype(typeTup->oid);
 
-               result = list_concat_unique_oid(result, GetTypeCollations(rangeid));
+                       Assert(OidIsValid(rangeid));
+                       result = GetTypeCollations(rangeid);
+               }
        }
-       else if (OidIsValid(typeTup->typelem))
-               result = list_concat_unique_oid(result,
-                                                                               GetTypeCollations(typeTup->typelem));
+       else if (IsTrueArrayType(typeTup))
+               result = GetTypeCollations(typeTup->typelem);
 
        ReleaseSysCache(tuple);
 
index 779405ef320eeac65c0b52f09c82bec903d8c7ac..faf99f76b540b0a71a936643eee20053e905425c 100644 (file)
@@ -1958,7 +1958,7 @@ CREATE DOMAIN d_custom AS t_custom;
 CREATE COLLATION custom (
     LOCALE = 'fr-x-icu', PROVIDER = 'icu'
 );
-CREATE TYPE myrange AS range (subtype = text);
+CREATE TYPE myrange AS range (subtype = text, collation = "POSIX");
 CREATE TYPE myrange_en_fr_ga AS range(subtype = t_en_fr_ga);
 CREATE TABLE collate_test (
     id integer,
@@ -2054,36 +2054,17 @@ ORDER BY 1, 2, 3;
  icuidx05_d_en_fr_ga_arr           | "en-x-icu" | up to date
  icuidx05_d_en_fr_ga_arr           | "fr-x-icu" | up to date
  icuidx05_d_en_fr_ga_arr           | "ga-x-icu" | up to date
- icuidx06_d_en_fr_ga               | "default"  | XXX
- icuidx06_d_en_fr_ga               | "en-x-icu" | up to date
  icuidx06_d_en_fr_ga               | "fr-x-icu" | up to date
- icuidx06_d_en_fr_ga               | "ga-x-icu" | up to date
- icuidx07_d_en_fr_ga               | "default"  | XXX
- icuidx07_d_en_fr_ga               | "en-x-icu" | up to date
- icuidx07_d_en_fr_ga               | "fr-x-icu" | up to date
  icuidx07_d_en_fr_ga               | "ga-x-icu" | up to date
- icuidx08_d_en_fr_ga               | "en-x-icu" | up to date
- icuidx08_d_en_fr_ga               | "fr-x-icu" | up to date
- icuidx08_d_en_fr_ga               | "ga-x-icu" | up to date
- icuidx09_d_en_fr_ga               | "en-x-icu" | up to date
- icuidx09_d_en_fr_ga               | "fr-x-icu" | up to date
- icuidx09_d_en_fr_ga               | "ga-x-icu" | up to date
- icuidx10_d_en_fr_ga_es            | "en-x-icu" | up to date
  icuidx10_d_en_fr_ga_es            | "es-x-icu" | up to date
- icuidx10_d_en_fr_ga_es            | "fr-x-icu" | up to date
- icuidx10_d_en_fr_ga_es            | "ga-x-icu" | up to date
- icuidx11_d_es                     | "default"  | XXX
  icuidx11_d_es                     | "es-x-icu" | up to date
- icuidx12_custom                   | "default"  | XXX
  icuidx12_custom                   | custom     | up to date
- icuidx13_custom                   | "default"  | XXX
  icuidx13_custom                   | custom     | up to date
- icuidx14_myrange                  | "default"  | XXX
  icuidx15_myrange_en_fr_ga         | "en-x-icu" | up to date
  icuidx15_myrange_en_fr_ga         | "fr-x-icu" | up to date
  icuidx15_myrange_en_fr_ga         | "ga-x-icu" | up to date
  icuidx17_part                     | "en-x-icu" | up to date
-(57 rows)
+(38 rows)
 
 -- Validate that REINDEX will update the stored version.
 UPDATE pg_depend SET refobjversion = 'not a version'
index 7f40c560398eade106171e6b860a576ec555dade..4c71f4d249e5f54ce08f175a346d64875bc8ade7 100644 (file)
@@ -755,7 +755,7 @@ CREATE COLLATION custom (
     LOCALE = 'fr-x-icu', PROVIDER = 'icu'
 );
 
-CREATE TYPE myrange AS range (subtype = text);
+CREATE TYPE myrange AS range (subtype = text, collation = "POSIX");
 CREATE TYPE myrange_en_fr_ga AS range(subtype = t_en_fr_ga);
 
 CREATE TABLE collate_test (