pg_upgrade: run all data type checks per connection
authorDaniel Gustafsson <dgustafsson@postgresql.org>
Tue, 19 Mar 2024 12:32:50 +0000 (13:32 +0100)
committerDaniel Gustafsson <dgustafsson@postgresql.org>
Tue, 19 Mar 2024 12:32:50 +0000 (13:32 +0100)
The checks for data type usage were each connecting to all databases
in the cluster and running their query. On clusters which have a lot
of databases this can become unnecessarily expensive. This moves the
checks to run in a single connection instead to minimize setup and
teardown overhead.

Reviewed-by: Nathan Bossart <nathandbossart@gmail.com>
Reviewed-by: Justin Pryzby <pryzby@telsasoft.com>
Reviewed-by: Peter Eisentraut <peter@eisentraut.org>
Discussion: https://postgr.es/m/BB4C76F-D416-4F9F-949E-DBE950D37787@yesql.se

src/bin/pg_upgrade/check.c
src/bin/pg_upgrade/pg_upgrade.h
src/bin/pg_upgrade/version.c

index 8ce6c674e321f17bb1ca02828bf91d29beafcf30..c198896c9f0fba7df38285e16edabd80de8f1a00 100644 (file)
@@ -10,6 +10,7 @@
 #include "postgres_fe.h"
 
 #include "catalog/pg_authid_d.h"
+#include "catalog/pg_class_d.h"
 #include "catalog/pg_collation.h"
 #include "fe_utils/string_utils.h"
 #include "mb/pg_wchar.h"
@@ -23,13 +24,6 @@ static void check_for_isn_and_int8_passing_mismatch(ClusterInfo *cluster);
 static void check_for_user_defined_postfix_ops(ClusterInfo *cluster);
 static void check_for_incompatible_polymorphics(ClusterInfo *cluster);
 static void check_for_tables_with_oids(ClusterInfo *cluster);
-static void check_for_composite_data_type_usage(ClusterInfo *cluster);
-static void check_for_reg_data_type_usage(ClusterInfo *cluster);
-static void check_for_aclitem_data_type_usage(ClusterInfo *cluster);
-static void check_for_removed_data_type_usage(ClusterInfo *cluster,
-                                             const char *version,
-                                             const char *datatype);
-static void check_for_jsonb_9_4_usage(ClusterInfo *cluster);
 static void check_for_pg_role_prefix(ClusterInfo *cluster);
 static void check_for_new_tablespace_dir(void);
 static void check_for_user_defined_encoding_conversions(ClusterInfo *cluster);
@@ -38,6 +32,497 @@ static void check_new_cluster_subscription_configuration(void);
 static void check_old_cluster_for_valid_slots(bool live_check);
 static void check_old_cluster_subscription_state(void);
 
+/*
+ * DataTypesUsageChecks - definitions of data type checks for the old cluster
+ * in order to determine if an upgrade can be performed.  See the comment on
+ * data_types_usage_checks below for a more detailed description.
+ */
+typedef struct
+{
+   /* Status line to print to the user */
+   const char *status;
+   /* Filename to store report to */
+   const char *report_filename;
+   /* Query to extract the oid of the datatype */
+   const char *base_query;
+   /* Text to store to report in case of error */
+   const char *report_text;
+   /* The latest version where the check applies */
+   int         threshold_version;
+   /* A function pointer for determining if the check applies */
+   DataTypesUsageVersionCheck version_hook;
+}          DataTypesUsageChecks;
+
+/*
+ * Special values for threshold_version for indicating that a check applies to
+ * all versions, or that a custom function needs to be invoked to determine
+ * if the check applies.
+ */
+#define MANUAL_CHECK 1
+#define ALL_VERSIONS -1
+
+/*--
+ * Data type usage checks. Each check for problematic data type usage is
+ * defined in this array with metadata, SQL query for finding the data type
+ * and functionality for deciding if the check is applicable to the version
+ * of the old cluster. The struct members are described in detail below:
+ *
+ * status              A oneline string which can be printed to the user to
+ *                     inform about progress. Should not end with newline.
+ * report_filename     The filename in which the list of problems detected by
+ *                     the check will be printed.
+ * base_query          A query which extracts the Oid of the datatype checked
+ *                     for.
+ * report_text         The text which will be printed to the user to explain
+ *                     what the check did, and why it failed. The text should
+ *                     end with a newline, and does not need to refer to the
+ *                     report_filename as that is automatically appended to
+ *                     the report with the path to the log folder.
+ * threshold_version   The major version of PostgreSQL for which to run the
+ *                     check. Iff the old cluster is less than, or equal to,
+ *                     the threshold version then the check will be executed.
+ *                     If the old version is greater than the threshold then
+ *                     the check is skipped. If the threshold_version is set
+ *                     to ALL_VERSIONS then it will be run unconditionally,
+ *                     if set to MANUAL_CHECK then the version_hook function
+ *                     will be executed in order to determine whether or not
+ *                     to run.
+ * version_hook            A function pointer to a version check function of type
+ *                     DataTypesUsageVersionCheck which is used to determine
+ *                     if the check is applicable to the old cluster. If the
+ *                     version_hook returns true then the check will be run,
+ *                     else it will be skipped. The function will only be
+ *                     executed iff threshold_version is set to MANUAL_CHECK.
+ */
+static DataTypesUsageChecks data_types_usage_checks[] =
+{
+   /*
+    * Look for composite types that were made during initdb *or* belong to
+    * information_schema; that's important in case information_schema was
+    * dropped and reloaded.
+    *
+    * The cutoff OID here should match the source cluster's value of
+    * FirstNormalObjectId.  We hardcode it rather than using that C #define
+    * because, if that #define is ever changed, our own version's value is
+    * NOT what to use.  Eventually we may need a test on the source cluster's
+    * version to select the correct value.
+    */
+   {
+       .status = gettext_noop("Checking for system-defined composite types in user tables"),
+           .report_filename = "tables_using_composite.txt",
+           .base_query =
+           "SELECT t.oid FROM pg_catalog.pg_type t "
+           "LEFT JOIN pg_catalog.pg_namespace n ON t.typnamespace = n.oid "
+           " WHERE typtype = 'c' AND (t.oid < 16384 OR nspname = 'information_schema')",
+           .report_text =
+           gettext_noop("Your installation contains system-defined composite types in user tables.\n"
+                        "These type OIDs are not stable across PostgreSQL versions,\n"
+                        "so this cluster cannot currently be upgraded.  You can drop the\n"
+                        "problem columns and restart the upgrade.\n"),
+           .threshold_version = ALL_VERSIONS
+   },
+
+   /*
+    * 9.3 -> 9.4 Fully implement the 'line' data type in 9.4, which
+    * previously returned "not enabled" by default and was only functionally
+    * enabled with a compile-time switch; as of 9.4 "line" has a different
+    * on-disk representation format.
+    */
+   {
+       .status = gettext_noop("Checking for incompatible \"line\" data type"),
+           .report_filename = "tables_using_line.txt",
+           .base_query =
+           "SELECT 'pg_catalog.line'::pg_catalog.regtype AS oid",
+           .report_text =
+           gettext_noop("Your installation contains the \"line\" data type in user tables.\n"
+                        "this data type changed its internal and input/output format\n"
+                        "between your old and new versions so this\n"
+                        "cluster cannot currently be upgraded.  You can\n"
+                        "drop the problem columns and restart the upgrade.\n"),
+           .threshold_version = 903
+   },
+
+   /*
+    * pg_upgrade only preserves these system values: pg_class.oid pg_type.oid
+    * pg_enum.oid
+    *
+    * Many of the reg* data types reference system catalog info that is not
+    * preserved, and hence these data types cannot be used in user tables
+    * upgraded by pg_upgrade.
+    */
+   {
+       .status = gettext_noop("Checking for reg* data types in user tables"),
+           .report_filename = "tables_using_reg.txt",
+
+       /*
+        * Note: older servers will not have all of these reg* types, so we
+        * have to write the query like this rather than depending on casts to
+        * regtype.
+        */
+           .base_query =
+           "SELECT oid FROM pg_catalog.pg_type t "
+           "WHERE t.typnamespace = "
+           "        (SELECT oid FROM pg_catalog.pg_namespace "
+           "         WHERE nspname = 'pg_catalog') "
+           "  AND t.typname IN ( "
+       /* pg_class.oid is preserved, so 'regclass' is OK */
+           "           'regcollation', "
+           "           'regconfig', "
+           "           'regdictionary', "
+           "           'regnamespace', "
+           "           'regoper', "
+           "           'regoperator', "
+           "           'regproc', "
+           "           'regprocedure' "
+       /* pg_authid.oid is preserved, so 'regrole' is OK */
+       /* pg_type.oid is (mostly) preserved, so 'regtype' is OK */
+           "         )",
+           .report_text =
+           gettext_noop("Your installation contains one of the reg* data types in user tables.\n"
+                        "These data types reference system OIDs that are not preserved by\n"
+                        "pg_upgrade, so this cluster cannot currently be upgraded.  You can\n"
+                        "drop the problem columns and restart the upgrade.\n"),
+           .threshold_version = ALL_VERSIONS
+   },
+
+   /*
+    * PG 16 increased the size of the 'aclitem' type, which breaks the
+    * on-disk format for existing data.
+    */
+   {
+       .status = gettext_noop("Checking for incompatible \"aclitem\" data type"),
+           .report_filename = "tables_using_aclitem.txt",
+           .base_query =
+           "SELECT 'pg_catalog.aclitem'::pg_catalog.regtype AS oid",
+           .report_text =
+           gettext_noop("Your installation contains the \"aclitem\" data type in user tables.\n"
+                        "The internal format of \"aclitem\" changed in PostgreSQL version 16\n"
+                        "so this cluster cannot currently be upgraded.  You can drop the\n"
+                        "problem columns and restart the upgrade.\n"),
+           .threshold_version = 1500
+   },
+
+   /*
+    * It's no longer allowed to create tables or views with "unknown"-type
+    * columns.  We do not complain about views with such columns, because
+    * they should get silently converted to "text" columns during the DDL
+    * dump and reload; it seems unlikely to be worth making users do that by
+    * hand.  However, if there's a table with such a column, the DDL reload
+    * will fail, so we should pre-detect that rather than failing
+    * mid-upgrade.  Worse, if there's a matview with such a column, the DDL
+    * reload will silently change it to "text" which won't match the on-disk
+    * storage (which is like "cstring").  So we *must* reject that.
+    */
+   {
+       .status = gettext_noop("Checking for invalid \"unknown\" user columns"),
+           .report_filename = "tables_using_unknown.txt",
+           .base_query =
+           "SELECT 'pg_catalog.unknown'::pg_catalog.regtype AS oid",
+           .report_text =
+           gettext_noop("Your installation contains the \"unknown\" data type in user tables.\n"
+                        "This data type is no longer allowed in tables, so this cluster\n"
+                        "cannot currently be upgraded.  You can drop the problem columns\n"
+                        "and restart the upgrade.\n"),
+           .threshold_version = 906
+   },
+
+   /*
+    * PG 12 changed the 'sql_identifier' type storage to be based on name,
+    * not varchar, which breaks on-disk format for existing data. So we need
+    * to prevent upgrade when used in user objects (tables, indexes, ...). In
+    * 12, the sql_identifier data type was switched from name to varchar,
+    * which does affect the storage (name is by-ref, but not varlena). This
+    * means user tables using sql_identifier for columns are broken because
+    * the on-disk format is different.
+    */
+   {
+       .status = gettext_noop("Checking for invalid \"sql_identifier\" user columns"),
+           .report_filename = "tables_using_sql_identifier.txt",
+           .base_query =
+           "SELECT 'information_schema.sql_identifier'::pg_catalog.regtype AS oid",
+           .report_text =
+           gettext_noop("Your installation contains the \"sql_identifier\" data type in user tables.\n"
+                        "The on-disk format for this data type has changed, so this\n"
+                        "cluster cannot currently be upgraded.  You can drop the problem\n"
+                        "columns and restart the upgrade.\n"),
+           .threshold_version = 1100
+   },
+
+   /*
+    * JSONB changed its storage format during 9.4 beta, so check for it.
+    */
+   {
+       .status = gettext_noop("Checking for incompatible \"jsonb\" data type in user tables"),
+           .report_filename = "tables_using_jsonb.txt",
+           .base_query =
+           "SELECT 'pg_catalog.jsonb'::pg_catalog.regtype AS oid",
+           .report_text =
+           gettext_noop("Your installation contains the \"jsonb\" data type in user tables.\n"
+                        "The internal format of \"jsonb\" changed during 9.4 beta so this\n"
+                        "cluster cannot currently be upgraded.  You can drop the problem \n"
+                        "columns and restart the upgrade.\n"),
+           .threshold_version = MANUAL_CHECK,
+           .version_hook = jsonb_9_4_check_applicable
+   },
+
+   /*
+    * PG 12 removed types abstime, reltime, tinterval.
+    */
+   {
+       .status = gettext_noop("Checking for removed \"abstime\" data type in user tables"),
+           .report_filename = "tables_using_abstime.txt",
+           .base_query =
+           "SELECT 'pg_catalog.abstime'::pg_catalog.regtype AS oid",
+           .report_text =
+           gettext_noop("Your installation contains the \"abstime\" data type in user tables.\n"
+                        "The \"abstime\" type has been removed in PostgreSQL version 12,\n"
+                        "so this cluster cannot currently be upgraded.  You can drop the\n"
+                        "problem columns, or change them to another data type, and restart\n"
+                        "the upgrade.\n"),
+           .threshold_version = 1100
+   },
+   {
+       .status = gettext_noop("Checking for removed \"reltime\" data type in user tables"),
+           .report_filename = "tables_using_reltime.txt",
+           .base_query =
+           "SELECT 'pg_catalog.reltime'::pg_catalog.regtype AS oid",
+           .report_text =
+           gettext_noop("Your installation contains the \"reltime\" data type in user tables.\n"
+                        "The \"reltime\" type has been removed in PostgreSQL version 12,\n"
+                        "so this cluster cannot currently be upgraded.  You can drop the\n"
+                        "problem columns, or change them to another data type, and restart\n"
+                        "the upgrade.\n"),
+           .threshold_version = 1100
+   },
+   {
+       .status = gettext_noop("Checking for removed \"tinterval\" data type in user tables"),
+           .report_filename = "tables_using_tinterval.txt",
+           .base_query =
+           "SELECT 'pg_catalog.tinterval'::pg_catalog.regtype AS oid",
+           .report_text =
+           gettext_noop("Your installation contains the \"tinterval\" data type in user tables.\n"
+                        "The \"tinterval\" type has been removed in PostgreSQL version 12,\n"
+                        "so this cluster cannot currently be upgraded.  You can drop the\n"
+                        "problem columns, or change them to another data type, and restart\n"
+                        "the upgrade.\n"),
+           .threshold_version = 1100
+   },
+
+   /* End of checks marker, must remain last */
+   {
+       NULL, NULL, NULL, NULL, 0, NULL
+   }
+};
+
+/*
+ * check_for_data_types_usage()
+ * Detect whether there are any stored columns depending on given type(s)
+ *
+ * If so, write a report to the given file name and signal a failure to the
+ * user.
+ *
+ * The checks to run are defined in a DataTypesUsageChecks structure where
+ * each check has a metadata for explaining errors to the user, a base_query,
+ * a report filename and a function pointer hook for validating if the check
+ * should be executed given the cluster at hand.
+ *
+ * base_query should be a SELECT yielding a single column named "oid",
+ * containing the pg_type OIDs of one or more types that are known to have
+ * inconsistent on-disk representations across server versions.
+ *
+ * We check for the type(s) in tables, matviews, and indexes, but not views;
+ * there's no storage involved in a view.
+ */
+static void
+check_for_data_types_usage(ClusterInfo *cluster, DataTypesUsageChecks * checks)
+{
+   bool        found = false;
+   bool       *results;
+   PQExpBufferData report;
+   DataTypesUsageChecks *tmp = checks;
+   int         n_data_types_usage_checks = 0;
+
+   prep_status("Checking for data type usage");
+
+   /* Gather number of checks to perform */
+   while (tmp->status != NULL)
+   {
+       n_data_types_usage_checks++;
+       tmp++;
+   }
+
+   /* Prepare an array to store the results of checks in */
+   results = pg_malloc0(sizeof(bool) * n_data_types_usage_checks);
+
+   /*
+    * Connect to each database in the cluster and run all defined checks
+    * against that database before trying the next one.
+    */
+   for (int dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++)
+   {
+       DbInfo     *active_db = &cluster->dbarr.dbs[dbnum];
+       PGconn     *conn = connectToServer(cluster, active_db->db_name);
+
+       for (int checknum = 0; checknum < n_data_types_usage_checks; checknum++)
+       {
+           PGresult   *res;
+           int         ntups;
+           int         i_nspname;
+           int         i_relname;
+           int         i_attname;
+           FILE       *script = NULL;
+           bool        db_used = false;
+           char        output_path[MAXPGPATH];
+           DataTypesUsageChecks *cur_check = &checks[checknum];
+
+           if (cur_check->threshold_version == MANUAL_CHECK)
+           {
+               Assert(cur_check->version_hook);
+
+               /*
+                * Make sure that the check applies to the current cluster
+                * version and skip if not. If no check hook has been defined
+                * we run the check for all versions.
+                */
+               if (!cur_check->version_hook(cluster))
+                   continue;
+           }
+           else if (cur_check->threshold_version != ALL_VERSIONS)
+           {
+               if (GET_MAJOR_VERSION(cluster->major_version) > cur_check->threshold_version)
+                   continue;
+           }
+           else
+               Assert(cur_check->threshold_version == ALL_VERSIONS);
+
+           snprintf(output_path, sizeof(output_path), "%s/%s",
+                    log_opts.basedir,
+                    cur_check->report_filename);
+
+           /*
+            * The type(s) of interest might be wrapped in a domain, array,
+            * composite, or range, and these container types can be nested
+            * (to varying extents depending on server version, but that's not
+            * of concern here).  To handle all these cases we need a
+            * recursive CTE.
+            */
+           res = executeQueryOrDie(conn,
+                                   "WITH RECURSIVE oids AS ( "
+           /* start with the type(s) returned by base_query */
+                                   "   %s "
+                                   "   UNION ALL "
+                                   "   SELECT * FROM ( "
+           /* inner WITH because we can only reference the CTE once */
+                                   "       WITH x AS (SELECT oid FROM oids) "
+           /* domains on any type selected so far */
+                                   "           SELECT t.oid FROM pg_catalog.pg_type t, x WHERE typbasetype = x.oid AND typtype = 'd' "
+                                   "           UNION ALL "
+           /* arrays over any type selected so far */
+                                   "           SELECT t.oid FROM pg_catalog.pg_type t, x WHERE typelem = x.oid AND typtype = 'b' "
+                                   "           UNION ALL "
+           /* composite types containing any type selected so far */
+                                   "           SELECT t.oid FROM pg_catalog.pg_type t, pg_catalog.pg_class c, pg_catalog.pg_attribute a, x "
+                                   "           WHERE t.typtype = 'c' AND "
+                                   "                 t.oid = c.reltype AND "
+                                   "                 c.oid = a.attrelid AND "
+                                   "                 NOT a.attisdropped AND "
+                                   "                 a.atttypid = x.oid "
+                                   "           UNION ALL "
+           /* ranges containing any type selected so far */
+                                   "           SELECT t.oid FROM pg_catalog.pg_type t, pg_catalog.pg_range r, x "
+                                   "           WHERE t.typtype = 'r' AND r.rngtypid = t.oid AND r.rngsubtype = x.oid"
+                                   "   ) foo "
+                                   ") "
+           /* now look for stored columns of any such type */
+                                   "SELECT n.nspname, c.relname, a.attname "
+                                   "FROM   pg_catalog.pg_class c, "
+                                   "       pg_catalog.pg_namespace n, "
+                                   "       pg_catalog.pg_attribute a "
+                                   "WHERE  c.oid = a.attrelid AND "
+                                   "       NOT a.attisdropped AND "
+                                   "       a.atttypid IN (SELECT oid FROM oids) AND "
+                                   "       c.relkind IN ("
+                                   CppAsString2(RELKIND_RELATION) ", "
+                                   CppAsString2(RELKIND_MATVIEW) ", "
+                                   CppAsString2(RELKIND_INDEX) ") AND "
+                                   "       c.relnamespace = n.oid AND "
+           /* exclude possible orphaned temp tables */
+                                   "       n.nspname !~ '^pg_temp_' AND "
+                                   "       n.nspname !~ '^pg_toast_temp_' AND "
+           /* exclude system catalogs, too */
+                                   "       n.nspname NOT IN ('pg_catalog', 'information_schema')",
+                                   cur_check->base_query);
+
+           ntups = PQntuples(res);
+
+           /*
+            * The datatype was found, so extract the data and log to the
+            * requested filename. We need to open the file for appending
+            * since the check might have already found the type in another
+            * database earlier in the loop.
+            */
+           if (ntups)
+           {
+               /*
+                * Make sure we have a buffer to save reports to now that we
+                * found a first failing check.
+                */
+               if (!found)
+                   initPQExpBuffer(&report);
+               found = true;
+
+               /*
+                * If this is the first time we see an error for the check in
+                * question then print a status message of the failure.
+                */
+               if (!results[checknum])
+               {
+                   pg_log(PG_REPORT, "    failed check: %s", _(cur_check->status));
+                   appendPQExpBuffer(&report, "\n%s\n%s    %s\n",
+                                     _(cur_check->report_text),
+                                     _("A list of the problem columns is in the file:"),
+                                     output_path);
+               }
+               results[checknum] = true;
+
+               i_nspname = PQfnumber(res, "nspname");
+               i_relname = PQfnumber(res, "relname");
+               i_attname = PQfnumber(res, "attname");
+
+               for (int rowno = 0; rowno < ntups; rowno++)
+               {
+                   if (script == NULL && (script = fopen_priv(output_path, "a")) == NULL)
+                       pg_fatal("could not open file \"%s\": %m", output_path);
+
+                   if (!db_used)
+                   {
+                       fprintf(script, "In database: %s\n", active_db->db_name);
+                       db_used = true;
+                   }
+                   fprintf(script, "  %s.%s.%s\n",
+                           PQgetvalue(res, rowno, i_nspname),
+                           PQgetvalue(res, rowno, i_relname),
+                           PQgetvalue(res, rowno, i_attname));
+               }
+
+               if (script)
+               {
+                   fclose(script);
+                   script = NULL;
+               }
+           }
+
+           PQclear(res);
+       }
+
+       PQfinish(conn);
+   }
+
+   if (found)
+       pg_fatal("Data type checks failed: %s", report.data);
+
+   check_ok();
+}
 
 /*
  * fix_path_separator
@@ -110,8 +595,6 @@ check_and_dump_old_cluster(bool live_check)
    check_is_install_user(&old_cluster);
    check_proper_datallowconn(&old_cluster);
    check_for_prepared_transactions(&old_cluster);
-   check_for_composite_data_type_usage(&old_cluster);
-   check_for_reg_data_type_usage(&old_cluster);
    check_for_isn_and_int8_passing_mismatch(&old_cluster);
 
    if (GET_MAJOR_VERSION(old_cluster.major_version) >= 1700)
@@ -129,22 +612,7 @@ check_and_dump_old_cluster(bool live_check)
        check_old_cluster_subscription_state();
    }
 
-   /*
-    * PG 16 increased the size of the 'aclitem' type, which breaks the
-    * on-disk format for existing data.
-    */
-   if (GET_MAJOR_VERSION(old_cluster.major_version) <= 1500)
-       check_for_aclitem_data_type_usage(&old_cluster);
-
-   /*
-    * PG 12 removed types abstime, reltime, tinterval.
-    */
-   if (GET_MAJOR_VERSION(old_cluster.major_version) <= 1100)
-   {
-       check_for_removed_data_type_usage(&old_cluster, "12", "abstime");
-       check_for_removed_data_type_usage(&old_cluster, "12", "reltime");
-       check_for_removed_data_type_usage(&old_cluster, "12", "tinterval");
-   }
+   check_for_data_types_usage(&old_cluster, data_types_usage_checks);
 
    /*
     * PG 14 changed the function signature of encoding conversion functions.
@@ -176,21 +644,12 @@ check_and_dump_old_cluster(bool live_check)
    if (GET_MAJOR_VERSION(old_cluster.major_version) <= 1100)
        check_for_tables_with_oids(&old_cluster);
 
-   /*
-    * PG 12 changed the 'sql_identifier' type storage to be based on name,
-    * not varchar, which breaks on-disk format for existing data. So we need
-    * to prevent upgrade when used in user objects (tables, indexes, ...).
-    */
-   if (GET_MAJOR_VERSION(old_cluster.major_version) <= 1100)
-       old_11_check_for_sql_identifier_data_type_usage(&old_cluster);
-
    /*
     * Pre-PG 10 allowed tables with 'unknown' type columns and non WAL logged
     * hash indexes
     */
    if (GET_MAJOR_VERSION(old_cluster.major_version) <= 906)
    {
-       old_9_6_check_for_unknown_data_type_usage(&old_cluster);
        if (user_opts.check)
            old_9_6_invalidate_hash_indexes(&old_cluster, true);
    }
@@ -199,14 +658,6 @@ check_and_dump_old_cluster(bool live_check)
    if (GET_MAJOR_VERSION(old_cluster.major_version) <= 905)
        check_for_pg_role_prefix(&old_cluster);
 
-   if (GET_MAJOR_VERSION(old_cluster.major_version) == 904 &&
-       old_cluster.controldata.cat_ver < JSONB_FORMAT_CHANGE_CAT_VER)
-       check_for_jsonb_9_4_usage(&old_cluster);
-
-   /* Pre-PG 9.4 had a different 'line' data type internal format */
-   if (GET_MAJOR_VERSION(old_cluster.major_version) <= 903)
-       old_9_3_check_for_line_data_type_usage(&old_cluster);
-
    /*
     * While not a check option, we do this now because this is the only time
     * the old server is running.
@@ -1122,220 +1573,6 @@ check_for_tables_with_oids(ClusterInfo *cluster)
 }
 
 
-/*
- * check_for_composite_data_type_usage()
- * Check for system-defined composite types used in user tables.
- *
- * The OIDs of rowtypes of system catalogs and information_schema views
- * can change across major versions; unlike user-defined types, we have
- * no mechanism for forcing them to be the same in the new cluster.
- * Hence, if any user table uses one, that's problematic for pg_upgrade.
- */
-static void
-check_for_composite_data_type_usage(ClusterInfo *cluster)
-{
-   bool        found;
-   Oid         firstUserOid;
-   char        output_path[MAXPGPATH];
-   char       *base_query;
-
-   prep_status("Checking for system-defined composite types in user tables");
-
-   snprintf(output_path, sizeof(output_path), "%s/%s",
-            log_opts.basedir,
-            "tables_using_composite.txt");
-
-   /*
-    * Look for composite types that were made during initdb *or* belong to
-    * information_schema; that's important in case information_schema was
-    * dropped and reloaded.
-    *
-    * The cutoff OID here should match the source cluster's value of
-    * FirstNormalObjectId.  We hardcode it rather than using that C #define
-    * because, if that #define is ever changed, our own version's value is
-    * NOT what to use.  Eventually we may need a test on the source cluster's
-    * version to select the correct value.
-    */
-   firstUserOid = 16384;
-
-   base_query = psprintf("SELECT t.oid FROM pg_catalog.pg_type t "
-                         "LEFT JOIN pg_catalog.pg_namespace n ON t.typnamespace = n.oid "
-                         " WHERE typtype = 'c' AND (t.oid < %u OR nspname = 'information_schema')",
-                         firstUserOid);
-
-   found = check_for_data_types_usage(cluster, base_query, output_path);
-
-   free(base_query);
-
-   if (found)
-   {
-       pg_log(PG_REPORT, "fatal");
-       pg_fatal("Your installation contains system-defined composite types in user tables.\n"
-                "These type OIDs are not stable across PostgreSQL versions,\n"
-                "so this cluster cannot currently be upgraded.  You can\n"
-                "drop the problem columns and restart the upgrade.\n"
-                "A list of the problem columns is in the file:\n"
-                "    %s", output_path);
-   }
-   else
-       check_ok();
-}
-
-/*
- * check_for_reg_data_type_usage()
- * pg_upgrade only preserves these system values:
- *     pg_class.oid
- *     pg_type.oid
- *     pg_enum.oid
- *
- * Many of the reg* data types reference system catalog info that is
- * not preserved, and hence these data types cannot be used in user
- * tables upgraded by pg_upgrade.
- */
-static void
-check_for_reg_data_type_usage(ClusterInfo *cluster)
-{
-   bool        found;
-   char        output_path[MAXPGPATH];
-
-   prep_status("Checking for reg* data types in user tables");
-
-   snprintf(output_path, sizeof(output_path), "%s/%s",
-            log_opts.basedir,
-            "tables_using_reg.txt");
-
-   /*
-    * Note: older servers will not have all of these reg* types, so we have
-    * to write the query like this rather than depending on casts to regtype.
-    */
-   found = check_for_data_types_usage(cluster,
-                                      "SELECT oid FROM pg_catalog.pg_type t "
-                                      "WHERE t.typnamespace = "
-                                      "        (SELECT oid FROM pg_catalog.pg_namespace "
-                                      "         WHERE nspname = 'pg_catalog') "
-                                      "  AND t.typname IN ( "
-   /* pg_class.oid is preserved, so 'regclass' is OK */
-                                      "           'regcollation', "
-                                      "           'regconfig', "
-                                      "           'regdictionary', "
-                                      "           'regnamespace', "
-                                      "           'regoper', "
-                                      "           'regoperator', "
-                                      "           'regproc', "
-                                      "           'regprocedure' "
-   /* pg_authid.oid is preserved, so 'regrole' is OK */
-   /* pg_type.oid is (mostly) preserved, so 'regtype' is OK */
-                                      "         )",
-                                      output_path);
-
-   if (found)
-   {
-       pg_log(PG_REPORT, "fatal");
-       pg_fatal("Your installation contains one of the reg* data types in user tables.\n"
-                "These data types reference system OIDs that are not preserved by\n"
-                "pg_upgrade, so this cluster cannot currently be upgraded.  You can\n"
-                "drop the problem columns and restart the upgrade.\n"
-                "A list of the problem columns is in the file:\n"
-                "    %s", output_path);
-   }
-   else
-       check_ok();
-}
-
-/*
- * check_for_aclitem_data_type_usage
- *
- * aclitem changed its storage format in 16, so check for it.
- */
-static void
-check_for_aclitem_data_type_usage(ClusterInfo *cluster)
-{
-   char        output_path[MAXPGPATH];
-
-   prep_status("Checking for incompatible \"%s\" data type in user tables",
-               "aclitem");
-
-   snprintf(output_path, sizeof(output_path), "tables_using_aclitem.txt");
-
-   if (check_for_data_type_usage(cluster, "pg_catalog.aclitem", output_path))
-   {
-       pg_log(PG_REPORT, "fatal");
-       pg_fatal("Your installation contains the \"aclitem\" data type in user tables.\n"
-                "The internal format of \"aclitem\" changed in PostgreSQL version 16\n"
-                "so this cluster cannot currently be upgraded.  You can drop the\n"
-                "problem columns and restart the upgrade.  A list of the problem\n"
-                "columns is in the file:\n"
-                "    %s", output_path);
-   }
-   else
-       check_ok();
-}
-
-/*
- * check_for_removed_data_type_usage
- *
- * Check for in-core data types that have been removed.  Callers know
- * the exact list.
- */
-static void
-check_for_removed_data_type_usage(ClusterInfo *cluster, const char *version,
-                                 const char *datatype)
-{
-   char        output_path[MAXPGPATH];
-   char        typename[NAMEDATALEN];
-
-   prep_status("Checking for removed \"%s\" data type in user tables",
-               datatype);
-
-   snprintf(output_path, sizeof(output_path), "tables_using_%s.txt",
-            datatype);
-   snprintf(typename, sizeof(typename), "pg_catalog.%s", datatype);
-
-   if (check_for_data_type_usage(cluster, typename, output_path))
-   {
-       pg_log(PG_REPORT, "fatal");
-       pg_fatal("Your installation contains the \"%s\" data type in user tables.\n"
-                "The \"%s\" type has been removed in PostgreSQL version %s,\n"
-                "so this cluster cannot currently be upgraded.  You can drop the\n"
-                "problem columns, or change them to another data type, and restart\n"
-                "the upgrade.  A list of the problem columns is in the file:\n"
-                "    %s", datatype, datatype, version, output_path);
-   }
-   else
-       check_ok();
-}
-
-
-/*
- * check_for_jsonb_9_4_usage()
- *
- * JSONB changed its storage format during 9.4 beta, so check for it.
- */
-static void
-check_for_jsonb_9_4_usage(ClusterInfo *cluster)
-{
-   char        output_path[MAXPGPATH];
-
-   prep_status("Checking for incompatible \"jsonb\" data type");
-
-   snprintf(output_path, sizeof(output_path), "%s/%s",
-            log_opts.basedir,
-            "tables_using_jsonb.txt");
-
-   if (check_for_data_type_usage(cluster, "pg_catalog.jsonb", output_path))
-   {
-       pg_log(PG_REPORT, "fatal");
-       pg_fatal("Your installation contains the \"jsonb\" data type in user tables.\n"
-                "The internal format of \"jsonb\" changed during 9.4 beta so this\n"
-                "cluster cannot currently be upgraded.  You can\n"
-                "drop the problem columns and restart the upgrade.\n"
-                "A list of the problem columns is in the file:\n"
-                "    %s", output_path);
-   }
-   else
-       check_ok();
-}
-
 /*
  * check_for_pg_role_prefix()
  *
index c0bfb002d2d3a61c227b7d5e9e764963d7cee65e..92bcb693fbf22e9028b50e31d71156384a2a9a25 100644 (file)
@@ -352,6 +352,9 @@ typedef struct
 } OSInfo;
 
 
+/* Function signature for data type check version hook */
+typedef bool (*DataTypesUsageVersionCheck) (ClusterInfo *cluster);
+
 /*
  * Global variables
  */
@@ -479,18 +482,10 @@ unsigned int str2uint(const char *str);
 
 /* version.c */
 
-bool       check_for_data_types_usage(ClusterInfo *cluster,
-                                      const char *base_query,
-                                      const char *output_path);
-bool       check_for_data_type_usage(ClusterInfo *cluster,
-                                     const char *type_name,
-                                     const char *output_path);
-void       old_9_3_check_for_line_data_type_usage(ClusterInfo *cluster);
-void       old_9_6_check_for_unknown_data_type_usage(ClusterInfo *cluster);
+bool       jsonb_9_4_check_applicable(ClusterInfo *cluster);
 void       old_9_6_invalidate_hash_indexes(ClusterInfo *cluster,
                                            bool check_mode);
 
-void       old_11_check_for_sql_identifier_data_type_usage(ClusterInfo *cluster);
 void       report_extension_updates(ClusterInfo *cluster);
 
 /* parallel.c */
index 9dc1399f36888cb82229d9248599d687bbf86d43..2de6dffccdab4d7ab039b7e3da46c3604e4c6615 100644 (file)
 
 #include "postgres_fe.h"
 
-#include "catalog/pg_class_d.h"
 #include "fe_utils/string_utils.h"
 #include "pg_upgrade.h"
 
-
 /*
- * check_for_data_types_usage()
- * Detect whether there are any stored columns depending on given type(s)
- *
- * If so, write a report to the given file name, and return true.
- *
- * base_query should be a SELECT yielding a single column named "oid",
- * containing the pg_type OIDs of one or more types that are known to have
- * inconsistent on-disk representations across server versions.
- *
- * We check for the type(s) in tables, matviews, and indexes, but not views;
- * there's no storage involved in a view.
+ * version_hook functions for check_for_data_types_usage in order to determine
+ * whether a data type check should be executed for the cluster in question or
+ * not.
  */
 bool
-check_for_data_types_usage(ClusterInfo *cluster,
-                          const char *base_query,
-                          const char *output_path)
+jsonb_9_4_check_applicable(ClusterInfo *cluster)
 {
-   bool        found = false;
-   FILE       *script = NULL;
-   int         dbnum;
+   /* JSONB changed its storage format during 9.4 beta */
+   if (GET_MAJOR_VERSION(cluster->major_version) == 904 &&
+       cluster->controldata.cat_ver < JSONB_FORMAT_CHANGE_CAT_VER)
+       return true;
 
-   for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++)
-   {
-       DbInfo     *active_db = &cluster->dbarr.dbs[dbnum];
-       PGconn     *conn = connectToServer(cluster, active_db->db_name);
-       PQExpBufferData querybuf;
-       PGresult   *res;
-       bool        db_used = false;
-       int         ntups;
-       int         rowno;
-       int         i_nspname,
-                   i_relname,
-                   i_attname;
-
-       /*
-        * The type(s) of interest might be wrapped in a domain, array,
-        * composite, or range, and these container types can be nested (to
-        * varying extents depending on server version, but that's not of
-        * concern here).  To handle all these cases we need a recursive CTE.
-        */
-       initPQExpBuffer(&querybuf);
-       appendPQExpBuffer(&querybuf,
-                         "WITH RECURSIVE oids AS ( "
-       /* start with the type(s) returned by base_query */
-                         " %s "
-                         " UNION ALL "
-                         " SELECT * FROM ( "
-       /* inner WITH because we can only reference the CTE once */
-                         "     WITH x AS (SELECT oid FROM oids) "
-       /* domains on any type selected so far */
-                         "         SELECT t.oid FROM pg_catalog.pg_type t, x WHERE typbasetype = x.oid AND typtype = 'd' "
-                         "         UNION ALL "
-       /* arrays over any type selected so far */
-                         "         SELECT t.oid FROM pg_catalog.pg_type t, x WHERE typelem = x.oid AND typtype = 'b' "
-                         "         UNION ALL "
-       /* composite types containing any type selected so far */
-                         "         SELECT t.oid FROM pg_catalog.pg_type t, pg_catalog.pg_class c, pg_catalog.pg_attribute a, x "
-                         "         WHERE t.typtype = 'c' AND "
-                         "               t.oid = c.reltype AND "
-                         "               c.oid = a.attrelid AND "
-                         "               NOT a.attisdropped AND "
-                         "               a.atttypid = x.oid "
-                         "         UNION ALL "
-       /* ranges containing any type selected so far */
-                         "         SELECT t.oid FROM pg_catalog.pg_type t, pg_catalog.pg_range r, x "
-                         "         WHERE t.typtype = 'r' AND r.rngtypid = t.oid AND r.rngsubtype = x.oid"
-                         " ) foo "
-                         ") "
-       /* now look for stored columns of any such type */
-                         "SELECT n.nspname, c.relname, a.attname "
-                         "FROM pg_catalog.pg_class c, "
-                         "     pg_catalog.pg_namespace n, "
-                         "     pg_catalog.pg_attribute a "
-                         "WHERE    c.oid = a.attrelid AND "
-                         "     NOT a.attisdropped AND "
-                         "     a.atttypid IN (SELECT oid FROM oids) AND "
-                         "     c.relkind IN ("
-                         CppAsString2(RELKIND_RELATION) ", "
-                         CppAsString2(RELKIND_MATVIEW) ", "
-                         CppAsString2(RELKIND_INDEX) ") AND "
-                         "     c.relnamespace = n.oid AND "
-       /* exclude possible orphaned temp tables */
-                         "     n.nspname !~ '^pg_temp_' AND "
-                         "     n.nspname !~ '^pg_toast_temp_' AND "
-       /* exclude system catalogs, too */
-                         "     n.nspname NOT IN ('pg_catalog', 'information_schema')",
-                         base_query);
-
-       res = executeQueryOrDie(conn, "%s", querybuf.data);
-
-       ntups = PQntuples(res);
-       i_nspname = PQfnumber(res, "nspname");
-       i_relname = PQfnumber(res, "relname");
-       i_attname = PQfnumber(res, "attname");
-       for (rowno = 0; rowno < ntups; rowno++)
-       {
-           found = true;
-           if (script == NULL && (script = fopen_priv(output_path, "w")) == NULL)
-               pg_fatal("could not open file \"%s\": %m", output_path);
-           if (!db_used)
-           {
-               fprintf(script, "In database: %s\n", active_db->db_name);
-               db_used = true;
-           }
-           fprintf(script, "  %s.%s.%s\n",
-                   PQgetvalue(res, rowno, i_nspname),
-                   PQgetvalue(res, rowno, i_relname),
-                   PQgetvalue(res, rowno, i_attname));
-       }
-
-       PQclear(res);
-
-       termPQExpBuffer(&querybuf);
-
-       PQfinish(conn);
-   }
-
-   if (script)
-       fclose(script);
-
-   return found;
-}
-
-/*
- * check_for_data_type_usage()
- * Detect whether there are any stored columns depending on the given type
- *
- * If so, write a report to the given file name, and return true.
- *
- * type_name should be a fully qualified type name.  This is just a
- * trivial wrapper around check_for_data_types_usage() to convert a
- * type name into a base query.
- */
-bool
-check_for_data_type_usage(ClusterInfo *cluster,
-                         const char *type_name,
-                         const char *output_path)
-{
-   bool        found;
-   char       *base_query;
-
-   base_query = psprintf("SELECT '%s'::pg_catalog.regtype AS oid",
-                         type_name);
-
-   found = check_for_data_types_usage(cluster, base_query, output_path);
-
-   free(base_query);
-
-   return found;
-}
-
-
-/*
- * old_9_3_check_for_line_data_type_usage()
- * 9.3 -> 9.4
- * Fully implement the 'line' data type in 9.4, which previously returned
- * "not enabled" by default and was only functionally enabled with a
- * compile-time switch; as of 9.4 "line" has a different on-disk
- * representation format.
- */
-void
-old_9_3_check_for_line_data_type_usage(ClusterInfo *cluster)
-{
-   char        output_path[MAXPGPATH];
-
-   prep_status("Checking for incompatible \"line\" data type");
-
-   snprintf(output_path, sizeof(output_path), "%s/%s",
-            log_opts.basedir,
-            "tables_using_line.txt");
-
-   if (check_for_data_type_usage(cluster, "pg_catalog.line", output_path))
-   {
-       pg_log(PG_REPORT, "fatal");
-       pg_fatal("Your installation contains the \"line\" data type in user tables.\n"
-                "This data type changed its internal and input/output format\n"
-                "between your old and new versions so this\n"
-                "cluster cannot currently be upgraded.  You can\n"
-                "drop the problem columns and restart the upgrade.\n"
-                "A list of the problem columns is in the file:\n"
-                "    %s", output_path);
-   }
-   else
-       check_ok();
-}
-
-
-/*
- * old_9_6_check_for_unknown_data_type_usage()
- * 9.6 -> 10
- * It's no longer allowed to create tables or views with "unknown"-type
- * columns.  We do not complain about views with such columns, because
- * they should get silently converted to "text" columns during the DDL
- * dump and reload; it seems unlikely to be worth making users do that
- * by hand.  However, if there's a table with such a column, the DDL
- * reload will fail, so we should pre-detect that rather than failing
- * mid-upgrade.  Worse, if there's a matview with such a column, the
- * DDL reload will silently change it to "text" which won't match the
- * on-disk storage (which is like "cstring").  So we *must* reject that.
- */
-void
-old_9_6_check_for_unknown_data_type_usage(ClusterInfo *cluster)
-{
-   char        output_path[MAXPGPATH];
-
-   prep_status("Checking for invalid \"unknown\" user columns");
-
-   snprintf(output_path, sizeof(output_path), "%s/%s",
-            log_opts.basedir,
-            "tables_using_unknown.txt");
-
-   if (check_for_data_type_usage(cluster, "pg_catalog.unknown", output_path))
-   {
-       pg_log(PG_REPORT, "fatal");
-       pg_fatal("Your installation contains the \"unknown\" data type in user tables.\n"
-                "This data type is no longer allowed in tables, so this\n"
-                "cluster cannot currently be upgraded.  You can\n"
-                "drop the problem columns and restart the upgrade.\n"
-                "A list of the problem columns is in the file:\n"
-                "    %s", output_path);
-   }
-   else
-       check_ok();
+   return false;
 }
 
 /*
@@ -351,41 +139,6 @@ old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode)
        check_ok();
 }
 
-/*
- * old_11_check_for_sql_identifier_data_type_usage()
- * 11 -> 12
- * In 12, the sql_identifier data type was switched from name to varchar,
- * which does affect the storage (name is by-ref, but not varlena). This
- * means user tables using sql_identifier for columns are broken because
- * the on-disk format is different.
- */
-void
-old_11_check_for_sql_identifier_data_type_usage(ClusterInfo *cluster)
-{
-   char        output_path[MAXPGPATH];
-
-   prep_status("Checking for invalid \"sql_identifier\" user columns");
-
-   snprintf(output_path, sizeof(output_path), "%s/%s",
-            log_opts.basedir,
-            "tables_using_sql_identifier.txt");
-
-   if (check_for_data_type_usage(cluster, "information_schema.sql_identifier",
-                                 output_path))
-   {
-       pg_log(PG_REPORT, "fatal");
-       pg_fatal("Your installation contains the \"sql_identifier\" data type in user tables.\n"
-                "The on-disk format for this data type has changed, so this\n"
-                "cluster cannot currently be upgraded.  You can\n"
-                "drop the problem columns and restart the upgrade.\n"
-                "A list of the problem columns is in the file:\n"
-                "    %s", output_path);
-   }
-   else
-       check_ok();
-}
-
-
 /*
  * report_extension_updates()
  * Report extensions that should be updated.