Implement table partitioning.
authorRobert Haas <rhaas@postgresql.org>
Wed, 7 Dec 2016 18:17:43 +0000 (13:17 -0500)
committerRobert Haas <rhaas@postgresql.org>
Wed, 7 Dec 2016 18:17:55 +0000 (13:17 -0500)
Table partitioning is like table inheritance and reuses much of the
existing infrastructure, but there are some important differences.
The parent is called a partitioned table and is always empty; it may
not have indexes or non-inherited constraints, since those make no
sense for a relation with no data of its own.  The children are called
partitions and contain all of the actual data.  Each partition has an
implicit partitioning constraint.  Multiple inheritance is not
allowed, and partitioning and inheritance can't be mixed.  Partitions
can't have extra columns and may not allow nulls unless the parent
does.  Tuples inserted into the parent are automatically routed to the
correct partition, so tuple-routing ON INSERT triggers are not needed.
Tuple routing isn't yet supported for partitions which are foreign
tables, and it doesn't handle updates that cross partition boundaries.

Currently, tables can be range-partitioned or list-partitioned.  List
partitioning is limited to a single column, but range partitioning can
involve multiple columns.  A partitioning "column" can be an
expression.

Because table partitioning is less general than table inheritance, it
is hoped that it will be easier to reason about properties of
partitions, and therefore that this will serve as a better foundation
for a variety of possible optimizations, including query planner
optimizations.  The tuple routing based which this patch does based on
the implicit partitioning constraints is an example of this, but it
seems likely that many other useful optimizations are also possible.

Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat,
Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova,
Rushabh Lathia, Erik Rijkers, among others.  Minor revisions by me.

85 files changed:
doc/src/sgml/catalogs.sgml
doc/src/sgml/ref/alter_table.sgml
doc/src/sgml/ref/create_foreign_table.sgml
doc/src/sgml/ref/create_table.sgml
src/backend/access/common/reloptions.c
src/backend/catalog/Makefile
src/backend/catalog/aclchk.c
src/backend/catalog/dependency.c
src/backend/catalog/heap.c
src/backend/catalog/index.c
src/backend/catalog/objectaddress.c
src/backend/catalog/partition.c [new file with mode: 0644]
src/backend/catalog/pg_constraint.c
src/backend/commands/analyze.c
src/backend/commands/copy.c
src/backend/commands/createas.c
src/backend/commands/indexcmds.c
src/backend/commands/lockcmds.c
src/backend/commands/policy.c
src/backend/commands/seclabel.c
src/backend/commands/sequence.c
src/backend/commands/tablecmds.c
src/backend/commands/trigger.c
src/backend/commands/typecmds.c
src/backend/commands/vacuum.c
src/backend/commands/view.c
src/backend/executor/execMain.c
src/backend/executor/nodeModifyTable.c
src/backend/nodes/copyfuncs.c
src/backend/nodes/equalfuncs.c
src/backend/nodes/nodeFuncs.c
src/backend/nodes/outfuncs.c
src/backend/nodes/readfuncs.c
src/backend/optimizer/util/plancat.c
src/backend/parser/analyze.c
src/backend/parser/gram.y
src/backend/parser/parse_agg.c
src/backend/parser/parse_expr.c
src/backend/parser/parse_func.c
src/backend/parser/parse_utilcmd.c
src/backend/rewrite/rewriteDefine.c
src/backend/rewrite/rewriteHandler.c
src/backend/rewrite/rowsecurity.c
src/backend/tcop/utility.c
src/backend/utils/adt/ruleutils.c
src/backend/utils/cache/relcache.c
src/backend/utils/cache/syscache.c
src/bin/pg_dump/common.c
src/bin/pg_dump/pg_dump.c
src/bin/pg_dump/pg_dump.h
src/bin/psql/describe.c
src/bin/psql/tab-complete.c
src/include/catalog/catversion.h
src/include/catalog/dependency.h
src/include/catalog/heap.h
src/include/catalog/indexing.h
src/include/catalog/partition.h [new file with mode: 0644]
src/include/catalog/pg_class.h
src/include/catalog/pg_partitioned_table.h [new file with mode: 0644]
src/include/catalog/pg_proc.h
src/include/commands/defrem.h
src/include/commands/tablecmds.h
src/include/executor/executor.h
src/include/nodes/execnodes.h
src/include/nodes/nodes.h
src/include/nodes/parsenodes.h
src/include/parser/kwlist.h
src/include/parser/parse_node.h
src/include/parser/parse_utilcmd.h
src/include/pg_config_manual.h
src/include/utils/builtins.h
src/include/utils/rel.h
src/include/utils/syscache.h
src/test/regress/expected/alter_table.out
src/test/regress/expected/create_table.out
src/test/regress/expected/inherit.out
src/test/regress/expected/insert.out
src/test/regress/expected/sanity_check.out
src/test/regress/expected/update.out
src/test/regress/sql/alter_table.sql
src/test/regress/sql/create_table.sql
src/test/regress/sql/inherit.sql
src/test/regress/sql/insert.sql
src/test/regress/sql/update.sql
src/tools/pgindent/typedefs.list

index c4246dcd866e07be2304c0572b6f461feae20dcf..9d2e89523d65fc3999d81998d713c4052792b4b3 100644 (file)
       <entry>template data for procedural languages</entry>
      </row>
 
+     <row>
+      <entry><link linkend="catalog-pg-partitioned-table"><structname>pg_partitioned_table</structname></link></entry>
+      <entry>information about partition key of tables</entry>
+     </row>
+
      <row>
       <entry><link linkend="catalog-pg-policy"><structname>pg_policy</structname></link></entry>
       <entry>row-security policies</entry>
       <entry><type>char</type></entry>
       <entry></entry>
       <entry>
-       <literal>r</> = ordinary table, <literal>i</> = index,
+       <literal>r</> = ordinary table, <literal>P</> = partitioned table,
+       <literal>i</> = index
        <literal>S</> = sequence, <literal>v</> = view,
        <literal>m</> = materialized view,
        <literal>c</> = composite type, <literal>t</> = TOAST table,
       </entry>
      </row>
 
+     <row>
+      <entry><structfield>relispartition</structfield></entry>
+      <entry><type>bool</type></entry>
+      <entry></entry>
+      <entry>True if table is a partition</entry>
+     </row>
+
      <row>
       <entry><structfield>relfrozenxid</structfield></entry>
       <entry><type>xid</type></entry>
        Access-method-specific options, as <quote>keyword=value</> strings
       </entry>
      </row>
+
+     <row>
+      <entry><structfield>relpartbound</structfield></entry>
+      <entry><type>pg_node_tree</type></entry>
+      <entry></entry>
+      <entry>
+       If table is a partition (see <structfield>relispartition</structfield>),
+       internal representation of the partition bound
+      </entry>
+     </row>
     </tbody>
    </tgroup>
   </table>
 
  </sect1>
 
+ <sect1 id="catalog-pg-partitioned-table">
+  <title><structname>pg_partitioned_table</structname></title>
+
+  <indexterm zone="catalog-pg-partitioned-table">
+   <primary>pg_partitioned_table</primary>
+  </indexterm>
+
+  <para>
+   The catalog <structname>pg_partitioned_table</structname> stores
+   information about how tables are partitioned.
+  </para>
+
+  <table>
+   <title><structname>pg_partitioned_table</> Columns</title>
+
+   <tgroup cols="4">
+    <thead>
+     <row>
+      <entry>Name</entry>
+      <entry>Type</entry>
+      <entry>References</entry>
+      <entry>Description</entry>
+     </row>
+    </thead>
+
+    <tbody>
+
+     <row>
+      <entry><structfield>partrelid</structfield></entry>
+      <entry><type>oid</type></entry>
+      <entry><literal><link linkend="catalog-pg-class"><structname>pg_class</structname></link>.oid</literal></entry>
+      <entry>The OID of the <structname>pg_class</> entry for this partitioned table</entry>
+     </row>
+
+     <row>
+      <entry><structfield>partstrat</structfield></entry>
+      <entry><type>char</type></entry>
+      <entry></entry>
+      <entry>
+       Partitioning strategy; <literal>l</> = list partitioned table,
+       <literal>r</> = range partitioned table
+      </entry>
+     </row>
+
+     <row>
+      <entry><structfield>partnatts</structfield></entry>
+      <entry><type>int2</type></entry>
+      <entry></entry>
+      <entry>The number of columns in partition key</entry>
+     </row>
+
+     <row>
+      <entry><structfield>partattrs</structfield></entry>
+      <entry><type>int2vector</type></entry>
+      <entry><literal><link linkend="catalog-pg-attribute"><structname>pg_attribute</structname></link>.attnum</literal></entry>
+      <entry>
+       This is an array of <structfield>partnatts</structfield> values that
+       indicate which table columns are part of the partition key.  For
+       example, a value of <literal>1 3</literal> would mean that the first
+       and the third table columns make up the partition key.  A zero in this
+       array indicates that the corresponding partition key column is an
+       expression, rather than a simple column reference.
+      </entry>
+     </row>
+
+     <row>
+      <entry><structfield>partclass</structfield></entry>
+      <entry><type>oidvector</type></entry>
+      <entry><literal><link linkend="catalog-pg-opclass"><structname>pg_opclass</structname></link>.oid</literal></entry>
+      <entry>
+       For each column in the partition key, this contains the OID of the
+       operator class to use.  See
+       <link linkend="catalog-pg-opclass"><structname>pg_opclass</structname></link> for details.
+      </entry>
+     </row>
+
+     <row>
+      <entry><structfield>partcollation</structfield></entry>
+      <entry><type>oidvector</type></entry>
+      <entry><literal><link linkend="catalog-pg-opclass"><structname>pg_opclass</structname></link>.oid</literal></entry>
+      <entry>
+       For each column in the partition key, this contains the OID of the
+       the collation to use for partitioning.
+      </entry>
+     </row>
+
+     <row>
+      <entry><structfield>partexprs</structfield></entry>
+      <entry><type>pg_node_tree</type></entry>
+      <entry></entry>
+      <entry>
+       Expression trees (in <function>nodeToString()</function>
+       representation) for partition key columns that are not simple column
+       references.  This is a list with one element for each zero
+       entry in <structfield>partattrs</>.  Null if all partition key columns
+       are simple references.
+      </entry>
+     </row>
+
+    </tbody>
+   </tgroup>
+  </table>
+ </sect1>
+
  <sect1 id="catalog-pg-policy">
   <title><structname>pg_policy</structname></title>
 
index e48ccf21e4fbd35d1ae4ac75af0dcf25741bab3c..a6a43c4b302c58cd39f23ade8d97de3807724b45 100644 (file)
@@ -33,6 +33,10 @@ ALTER TABLE [ IF EXISTS ] <replaceable class="PARAMETER">name</replaceable>
     SET SCHEMA <replaceable class="PARAMETER">new_schema</replaceable>
 ALTER TABLE ALL IN TABLESPACE <replaceable class="PARAMETER">name</replaceable> [ OWNED BY <replaceable class="PARAMETER">role_name</replaceable> [, ... ] ]
     SET TABLESPACE <replaceable class="PARAMETER">new_tablespace</replaceable> [ NOWAIT ]
+ALTER TABLE [ IF EXISTS ] <replaceable class="PARAMETER">name</replaceable>
+    ATTACH PARTITION <replaceable class="PARAMETER">partition_name</replaceable> FOR VALUES <replaceable class="PARAMETER">partition_bound_spec</replaceable>
+ALTER TABLE [ IF EXISTS ] <replaceable class="PARAMETER">name</replaceable>
+    DETACH PARTITION <replaceable class="PARAMETER">partition_name</replaceable>
 
 <phrase>where <replaceable class="PARAMETER">action</replaceable> is one of:</phrase>
 
@@ -166,6 +170,12 @@ ALTER TABLE ALL IN TABLESPACE <replaceable class="PARAMETER">name</replaceable>
       values or to reject null values.  You can only use <literal>SET
       NOT NULL</> when the column contains no null values.
      </para>
+
+     <para>
+      If this table is a partition, one cannot perform <literal>DROP NOT NULL</>
+      on a column if it is marked <literal>NOT NULL</literal> in the parent
+      table.
+     </para>
     </listitem>
    </varlistentry>
 
@@ -704,13 +714,63 @@ ALTER TABLE ALL IN TABLESPACE <replaceable class="PARAMETER">name</replaceable>
     </listitem>
    </varlistentry>
 
+   <varlistentry>
+    <term><literal>ATTACH PARTITION</literal> <replaceable class="PARAMETER">partition_name</replaceable> <replaceable class="PARAMETER">partition_bound_spec</replaceable></term>
+    <listitem>
+     <para>
+      This form attaches an existing table (which might itself be partitioned)
+      as a partition of the target table using the same syntax for
+      <replaceable class="PARAMETER">partition_bound_spec</replaceable> as
+      <xref linkend="sql-createtable">.  The partition bound specification
+      must correspond to the partitioning strategy and partition key of the
+      target table.  The table to be attached must have all the same columns
+      as the target table and no more; moreover, the column types must also
+      match.  Also, it must have all the <literal>NOT NULL</literal> and
+      <literal>CHECK</literal> constraints of the target table.  Currently
+      <literal>UNIQUE</literal>, <literal>PRIMARY KEY</literal>, and
+      <literal>FOREIGN KEY</literal> constraints are not considered.
+      If any of the <literal>CHECK</literal> constraints of the table being
+      attached is marked <literal>NO INHERIT</literal>, the command will fail;
+      such a constraint must be recreated without the <literal>NO INHERIT</literal>
+      clause.
+     </para>
+
+     <para>
+      A full table scan is performed on the table being attached to check that
+      no existing row in the table violates the partition constraint.  It is
+      possible to avoid this scan by adding a valid <literal>CHECK</literal>
+      constraint to the table that would allow only the rows satisfying the
+      desired partition constraint before running this command.  It will be
+      determined using such a constraint that the table need not be scanned
+      to validate the partition constraint.  This does not work, however, if
+      any of the partition keys is an expression and the partition does not
+      accept <literal>NULL</literal> values.  If attaching a list partition
+      that will not accept <literal>NULL</literal> values, also add
+      <literal>NOT NULL</literal> constraint to the partition key column,
+      unless it's an expression.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><literal>DETACH PARTITION</literal> <replaceable class="PARAMETER">partition_name</replaceable></term>
+    <listitem>
+     <para>
+      This form detaches specified partition of the target table.  The detached
+      partition continues to exist as a standalone table, but no longer has any
+      ties to the table from which it was detached.
+     </para>
+    </listitem>
+   </varlistentry>
+
   </variablelist>
   </para>
 
   <para>
    All the actions except <literal>RENAME</literal>,
-   <literal>SET TABLESPACE</literal> and <literal>SET SCHEMA</literal>
-   can be combined into
+   <literal>SET TABLESPACE</literal>, <literal>SET SCHEMA</literal>,
+   <literal>ATTACH PARTITION</literal>, and
+   <literal>DETACH PARTITION</literal> can be combined into
    a list of multiple alterations to apply in parallel.  For example, it
    is possible to add several columns and/or alter the type of several
    columns in a single command.  This is particularly useful with large
@@ -721,8 +781,9 @@ ALTER TABLE ALL IN TABLESPACE <replaceable class="PARAMETER">name</replaceable>
    You must own the table to use <command>ALTER TABLE</>.
    To change the schema or tablespace of a table, you must also have
    <literal>CREATE</literal> privilege on the new schema or tablespace.
-   To add the table as a new child of a parent table, you must own the
-   parent table as well.
+   To add the table as a new child of a parent table, you must own the parent
+   table as well.  Also, to attach a table as a new partition of the table,
+   you must own the table being attached.
    To alter the owner, you must also be a direct or indirect member of the new
    owning role, and that role must have <literal>CREATE</literal> privilege on
    the table's schema.  (These restrictions enforce that altering the owner
@@ -938,6 +999,25 @@ ALTER TABLE ALL IN TABLESPACE <replaceable class="PARAMETER">name</replaceable>
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><replaceable class="PARAMETER">partition_name</replaceable></term>
+      <listitem>
+       <para>
+        The name of the table to attach as a new partition or to detach from this table.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><replaceable class="PARAMETER">partition_bound_spec</replaceable></term>
+      <listitem>
+       <para>
+        The partition bound specification for a new partition.  Refer to
+        <xref linkend="sql-createtable"> for more details on the syntax of the same.
+       </para>
+      </listitem>
+     </varlistentry>
+
     </variablelist>
  </refsect1>
 
@@ -977,6 +1057,11 @@ ALTER TABLE ALL IN TABLESPACE <replaceable class="PARAMETER">name</replaceable>
     but does not require a table rewrite.
    </para>
 
+   <para>
+    Similarly, when attaching a new partition it may be scanned to verify that
+    existing rows meet the partition constraint.
+   </para>
+
    <para>
     The main reason for providing the option to specify multiple changes
     in a single <command>ALTER TABLE</> is that multiple table scans or
@@ -1047,6 +1132,9 @@ ALTER TABLE ALL IN TABLESPACE <replaceable class="PARAMETER">name</replaceable>
     COLUMN</literal> (i.e., <command>ALTER TABLE ONLY ... DROP
     COLUMN</command>) never removes any descendant columns, but
     instead marks them as independently defined rather than inherited.
+    A nonrecursive <literal>DROP COLUMN</literal> command will fail for a
+    partitioned table, because all partitions of a table must have the same
+    columns as the partitioning root.
    </para>
 
    <para>
@@ -1233,6 +1321,27 @@ ALTER TABLE distributors DROP CONSTRAINT distributors_pkey,
     ADD CONSTRAINT distributors_pkey PRIMARY KEY USING INDEX dist_id_temp_idx;
 </programlisting></para>
 
+  <para>
+   Attach a partition to range partitioned table:
+<programlisting>
+ALTER TABLE measurement
+    ATTACH PARTITION measurement_y2016m07 FOR VALUES FROM ('2016-07-01') TO ('2016-08-01');
+</programlisting></para>
+
+  <para>
+   Attach a partition to list partitioned table:
+<programlisting>
+ALTER TABLE cities
+    ATTACH PARTITION cities_west FOR VALUES IN ('Los Angeles', 'San Francisco');
+</programlisting></para>
+
+  <para>
+   Detach a partition from partitioned table:
+<programlisting>
+ALTER TABLE cities
+    DETACH PARTITION measurement_y2015m12;
+</programlisting></para>
+
  </refsect1>
 
  <refsect1>
index 413b033cb57fe0b149bc6768fad4e7810ae35f45..5d0dcf567b5d12dabb823aadb53b0da54d0fe9bc 100644 (file)
@@ -27,6 +27,15 @@ CREATE FOREIGN TABLE [ IF NOT EXISTS ] <replaceable class="PARAMETER">table_name
   SERVER <replaceable class="parameter">server_name</replaceable>
 [ OPTIONS ( <replaceable class="PARAMETER">option</replaceable> '<replaceable class="PARAMETER">value</replaceable>' [, ... ] ) ]
 
+CREATE FOREIGN TABLE [ IF NOT EXISTS ] <replaceable class="PARAMETER">table_name</replaceable>
+  PARTITION OF <replaceable class="PARAMETER">parent_table</replaceable> [ (
+  { <replaceable class="PARAMETER">column_name</replaceable> WITH OPTIONS [ <replaceable class="PARAMETER">column_constraint</replaceable> [ ... ] ]
+    | <replaceable>table_constraint</replaceable> }
+    [, ... ]
+) ] <replaceable class="PARAMETER">partition_bound_spec</replaceable>
+  SERVER <replaceable class="parameter">server_name</replaceable>
+[ OPTIONS ( <replaceable class="PARAMETER">option</replaceable> '<replaceable class="PARAMETER">value</replaceable>' [, ... ] ) ]
+
 <phrase>where <replaceable class="PARAMETER">column_constraint</replaceable> is:</phrase>
 
 [ CONSTRAINT <replaceable class="PARAMETER">constraint_name</replaceable> ]
@@ -67,6 +76,12 @@ CHECK ( <replaceable class="PARAMETER">expression</replaceable> ) [ NO INHERIT ]
    name as any existing data type in the same schema.
   </para>
 
+  <para>
+   If <literal>PARTITION OF</literal> clause is specified then the table is
+   created as a partition of <literal>parent_table</literal> with specified
+   bounds.
+  </para>
+
   <para>
    To be able to create a foreign table, you must have <literal>USAGE</literal>
    privilege on the foreign server, as well as <literal>USAGE</literal>
@@ -314,6 +329,17 @@ CREATE FOREIGN TABLE films (
 SERVER film_server;
 </programlisting></para>
 
+  <para>
+   Create foreign table <structname>measurement_y2016m07</>, which will be
+   accessed through the server <structname>server_07</>, as a partition
+   of the range partitioned table <structname>measurement</>:
+
+<programlisting>
+CREATE FOREIGN TABLE measurement_y2016m07
+    PARTITION OF measurement FOR VALUES FROM ('2016-07-01') TO ('2016-08-01')
+    SERVER server_07;
+</programlisting></para>
+
  </refsect1>
 
  <refsect1 id="SQL-CREATEFOREIGNTABLE-compatibility">
index bf2ad64d66e3a40011a06a9904bce0a70aae09d6..8bf8af302b595ab111492b29678d5cdd75300cd1 100644 (file)
@@ -28,6 +28,7 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI
     [, ... ]
 ] )
 [ INHERITS ( <replaceable>parent_table</replaceable> [, ... ] ) ]
+[ PARTITION BY { RANGE | LIST } ( { <replaceable class="parameter">column_name</replaceable> | ( <replaceable class="parameter">expression</replaceable> ) } [ COLLATE <replaceable class="parameter">collation</replaceable> ] [ <replaceable class="parameter">opclass</replaceable> ] [, ... ] ) ]
 [ WITH ( <replaceable class="PARAMETER">storage_parameter</replaceable> [= <replaceable class="PARAMETER">value</replaceable>] [, ... ] ) | WITH OIDS | WITHOUT OIDS ]
 [ ON COMMIT { PRESERVE ROWS | DELETE ROWS | DROP } ]
 [ TABLESPACE <replaceable class="PARAMETER">tablespace_name</replaceable> ]
@@ -38,6 +39,18 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI
     | <replaceable>table_constraint</replaceable> }
     [, ... ]
 ) ]
+[ PARTITION BY { RANGE | LIST } ( { <replaceable class="parameter">column_name</replaceable> | ( <replaceable class="parameter">expression</replaceable> ) } [ COLLATE <replaceable class="parameter">collation</replaceable> ] [ <replaceable class="parameter">opclass</replaceable> ] [, ... ] ) ]
+[ WITH ( <replaceable class="PARAMETER">storage_parameter</replaceable> [= <replaceable class="PARAMETER">value</replaceable>] [, ... ] ) | WITH OIDS | WITHOUT OIDS ]
+[ ON COMMIT { PRESERVE ROWS | DELETE ROWS | DROP } ]
+[ TABLESPACE <replaceable class="PARAMETER">tablespace_name</replaceable> ]
+
+CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXISTS ] <replaceable class="PARAMETER">table_name</replaceable>
+    PARTITION OF <replaceable class="PARAMETER">parent_table</replaceable> [ (
+  { <replaceable class="PARAMETER">column_name</replaceable> [ <replaceable class="PARAMETER">column_constraint</replaceable> [ ... ] ]
+    | <replaceable>table_constraint</replaceable> }
+    [, ... ]
+) ] FOR VALUES <replaceable class="PARAMETER">partition_bound_spec</replaceable>
+[ PARTITION BY { RANGE | LIST } ( { <replaceable class="parameter">column_name</replaceable> | ( <replaceable class="parameter">expression</replaceable> ) } [ COLLATE <replaceable class="parameter">collation</replaceable> ] [ <replaceable class="parameter">opclass</replaceable> ] [, ... ] ) ]
 [ WITH ( <replaceable class="PARAMETER">storage_parameter</replaceable> [= <replaceable class="PARAMETER">value</replaceable>] [, ... ] ) | WITH OIDS | WITHOUT OIDS ]
 [ ON COMMIT { PRESERVE ROWS | DELETE ROWS | DROP } ]
 [ TABLESPACE <replaceable class="PARAMETER">tablespace_name</replaceable> ]
@@ -70,6 +83,11 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI
 
 { INCLUDING | EXCLUDING } { DEFAULTS | CONSTRAINTS | INDEXES | STORAGE | COMMENTS | ALL }
 
+<phrase>and <replaceable class="PARAMETER">partition_bound_spec</replaceable> is:</phrase>
+
+{ IN ( <replaceable class="PARAMETER">expression</replaceable> [, ...] ) |
+  FROM ( { <replaceable class="PARAMETER">expression</replaceable> | UNBOUNDED } [, ...] ) TO ( { <replaceable class="PARAMETER">expression</replaceable> | UNBOUNDED } [, ...] ) }
+
 <phrase><replaceable class="PARAMETER">index_parameters</replaceable> in <literal>UNIQUE</literal>, <literal>PRIMARY KEY</literal>, and <literal>EXCLUDE</literal> constraints are:</phrase>
 
 [ WITH ( <replaceable class="PARAMETER">storage_parameter</replaceable> [= <replaceable class="PARAMETER">value</replaceable>] [, ... ] ) ]
@@ -229,6 +247,51 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI
     </listitem>
    </varlistentry>
 
+   <varlistentry>
+    <term><literal>PARTITION OF <replaceable class="PARAMETER">parent_table</replaceable></literal></term>
+    <listitem>
+     <para>
+      Creates the table as <firstterm>partition</firstterm> of the specified
+      parent table.
+     </para>
+
+     <para>
+      The partition bound specification must correspond to the partitioning
+      method and partition key of the parent table, and must not overlap with
+      any existing partition of that parent.
+     </para>
+
+     <para>
+      A partition cannot have columns other than those inherited from the
+      parent.  That includes the <structfield>oid</> column, which can be
+      specified using the <literal>WITH (OIDS)</literal> clause.
+      Defaults and constraints can optionally be specified for each of the
+      inherited columns.  One can also specify table constraints in addition
+      to those inherited from the parent.  If a check constraint with the name
+      matching one of the parent's constraint is specified, it is merged with
+      the latter, provided the specified condition is same.
+     </para>
+
+     <para>
+      Rows inserted into a partitioned table will be automatically routed to
+      the correct partition.  If no suitable partition exists, an error will
+      occur.
+     </para>
+
+     <para>
+      A partition must have the same column names and types as the table of
+      which it is a partition.  Therefore, modifications to the column names
+      or types of the partitioned table will automatically propagate to all
+      children, as will operations such as TRUNCATE which normally affect a
+      table and all of its inheritance children.  It is also possible to
+      TRUNCATE a partition individually, just as for an inheritance child.
+      Note that dropping a partition with <literal>DROP TABLE</literal>
+      requires taking an <literal>ACCESS EXCLUSIVE</literal> lock on the
+      parent table.
+     </para>
+    </listitem>
+   </varlistentry>
+
    <varlistentry>
     <term><replaceable class="PARAMETER">column_name</replaceable></term>
     <listitem>
@@ -313,6 +376,46 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI
     </listitem>
    </varlistentry>
 
+   <varlistentry>
+    <term><literal>PARTITION BY { RANGE | LIST } ( { <replaceable class="parameter">column_name</replaceable> | ( <replaceable class="parameter">expression</replaceable> ) } [ <replaceable class="parameter">opclass</replaceable> ] [, ...] ) </literal></term>
+    <listitem>
+     <para>
+      The optional <literal>PARTITION BY</literal> clause specifies a strategy
+      of partitioning the table.  The table thus created is called a
+      <firstterm>partitioned</firstterm> table.  The parenthesized list of
+      columns or expressions forms the <firstterm>partition key</firstterm>
+      for the table.  When using range partitioning, the partition key can
+      include multiple columns or expressions, but for list partitioning, the
+      partition key must consist of a single column or expression.  If no
+      btree operator class is specified when creating a partitioned table,
+      the default btree operator class for the datatype will be used.  If
+      there is none, an error will be reported.
+     </para>
+
+     <para>
+      A partitioned table is divided into sub-tables (called partitions),
+      which are created using separate <literal>CREATE TABLE</> commands.
+      The partitioned table is itself empty.  A data row inserted into the
+      table is routed to a partition based on the value of columns or
+      expressions in the partition key.  If no existing partition matches
+      the values in the new row, an error will be reported.
+     </para>
+
+     <para>
+      Partitioned tables do not support <literal>UNIQUE</literal>,
+      <literal>PRIMARY KEY</literal>, <literal>EXCLUDE</literal>, or
+      <literal>FOREIGN KEY</literal> constraints; however, you can define
+      these constraints on individual partitions.
+     </para>
+
+     <para>
+      When using range partitioning, a <literal>NOT NULL</literal> constraint
+      is added to each non-expression column in the partition key.
+     </para>
+
+    </listitem>
+   </varlistentry>
+
    <varlistentry>
     <term><literal>LIKE <replaceable>source_table</replaceable> [ <replaceable>like_option</replaceable> ... ]</literal></term>
     <listitem>
@@ -1368,6 +1471,57 @@ CREATE TABLE employees OF employee_type (
     PRIMARY KEY (name),
     salary WITH OPTIONS DEFAULT 1000
 );
+</programlisting></para>
+
+  <para>
+   Create a range partitioned table:
+<programlisting>
+CREATE TABLE measurement (
+    city_id         int not null,
+    logdate         date not null,
+    peaktemp        int,
+    unitsales       int
+) PARTITION BY RANGE (logdate);
+</programlisting></para>
+
+  <para>
+   Create a list partitioned table:
+<programlisting>
+CREATE TABLE cities (
+    name         text not null,
+    population   int,
+) PARTITION BY LIST (initcap(name));
+</programlisting></para>
+
+  <para>
+   Create partition of a range partitioned table:
+<programlisting>
+CREATE TABLE measurement_y2016m07
+    PARTITION OF measurement (
+    unitsales WITH OPTIONS DEFAULT 0
+) FOR VALUES FROM ('2016-07-01') TO ('2016-08-01');
+</programlisting></para>
+
+  <para>
+   Create partition of a list partitioned table:
+<programlisting>
+CREATE TABLE cities_west
+    PARTITION OF cities (
+    CONSTRAINT city_id_nonzero CHECK (city_id != 0)
+) FOR VALUES IN ('Los Angeles', 'San Francisco');
+</programlisting></para>
+
+  <para>
+   Create partition of a list partitioned table that is itself further
+   partitioned and then add a partition to it:
+<programlisting>
+CREATE TABLE cities_west
+    PARTITION OF cities (
+    CONSTRAINT city_id_nonzero CHECK (city_id != 0)
+) FOR VALUES IN ('Los Angeles', 'San Francisco') PARTITION BY RANGE (population);
+
+CREATE TABLE cities_west_10000_to_100000
+    PARTITION OF cities_west FOR VALUES FROM (10000) TO (100000);
 </programlisting></para>
  </refsect1>
 
index 83a97b06ab85bdfaa04c82bf2479948a2cc68849..34018cac7c89a729877f3f286d7cfc8d392a9bd3 100644 (file)
@@ -930,6 +930,7 @@ extractRelOptions(HeapTuple tuple, TupleDesc tupdesc,
                case RELKIND_RELATION:
                case RELKIND_TOASTVALUE:
                case RELKIND_MATVIEW:
+               case RELKIND_PARTITIONED_TABLE:
                        options = heap_reloptions(classForm->relkind, datum, false);
                        break;
                case RELKIND_VIEW:
@@ -1381,6 +1382,7 @@ heap_reloptions(char relkind, Datum reloptions, bool validate)
                        return (bytea *) rdopts;
                case RELKIND_RELATION:
                case RELKIND_MATVIEW:
+               case RELKIND_PARTITIONED_TABLE:
                        return default_reloptions(reloptions, validate, RELOPT_KIND_HEAP);
                default:
                        /* other relkinds are not supported */
index 1ce761004979d2c5eb56021ed200fae5a863a22d..2d5ac09bece554bdd9dee157e6dc1111dc2c67df 100644 (file)
@@ -11,7 +11,7 @@ top_builddir = ../../..
 include $(top_builddir)/src/Makefile.global
 
 OBJS = catalog.o dependency.o heap.o index.o indexing.o namespace.o aclchk.o \
-       objectaccess.o objectaddress.o pg_aggregate.o pg_collation.o \
+       objectaccess.o objectaddress.o partition.o pg_aggregate.o pg_collation.o \
        pg_constraint.o pg_conversion.o \
        pg_depend.o pg_enum.o pg_inherits.o pg_largeobject.o pg_namespace.o \
        pg_operator.o pg_proc.o pg_range.o pg_db_role_setting.o pg_shdepend.o \
@@ -41,7 +41,7 @@ POSTGRES_BKI_SRCS = $(addprefix $(top_srcdir)/src/include/catalog/,\
        pg_foreign_data_wrapper.h pg_foreign_server.h pg_user_mapping.h \
        pg_foreign_table.h pg_policy.h pg_replication_origin.h \
        pg_default_acl.h pg_init_privs.h pg_seclabel.h pg_shseclabel.h \
-       pg_collation.h pg_range.h pg_transform.h \
+       pg_collation.h pg_partitioned_table.h pg_range.h pg_transform.h \
        toasting.h indexing.h \
     )
 
index c0df6710d1d110efbf767c82e12f15c793dadff3..3086021432ab3abed6660669ceff3415740319fa 100644 (file)
@@ -768,6 +768,8 @@ objectsInSchemaToOids(GrantObjectType objtype, List *nspnames)
                                objects = list_concat(objects, objs);
                                objs = getRelationsInNamespace(namespaceId, RELKIND_FOREIGN_TABLE);
                                objects = list_concat(objects, objs);
+                               objs = getRelationsInNamespace(namespaceId, RELKIND_PARTITIONED_TABLE);
+                               objects = list_concat(objects, objs);
                                break;
                        case ACL_OBJECT_SEQUENCE:
                                objs = getRelationsInNamespace(namespaceId, RELKIND_SEQUENCE);
index b697e88ef0910a4d6c828f0a2981e83562178812..0cdd1c5c6cb05a6122cdd49f5205b968e6dcdba7 100644 (file)
@@ -1352,7 +1352,8 @@ void
 recordDependencyOnSingleRelExpr(const ObjectAddress *depender,
                                                                Node *expr, Oid relId,
                                                                DependencyType behavior,
-                                                               DependencyType self_behavior)
+                                                               DependencyType self_behavior,
+                                                               bool ignore_self)
 {
        find_expr_references_context context;
        RangeTblEntry rte;
@@ -1407,9 +1408,10 @@ recordDependencyOnSingleRelExpr(const ObjectAddress *depender,
                context.addrs->numrefs = outrefs;
 
                /* Record the self-dependencies */
-               recordMultipleDependencies(depender,
-                                                                  self_addrs->refs, self_addrs->numrefs,
-                                                                  self_behavior);
+               if (!ignore_self)
+                       recordMultipleDependencies(depender,
+                                                                          self_addrs->refs, self_addrs->numrefs,
+                                                                          self_behavior);
 
                free_object_addresses(self_addrs);
        }
index 0b804e7ac6094e68570b081146379fe92f679c55..7f5bad0b5da322536d57a3047bcc4aa8c92630df 100644 (file)
@@ -41,6 +41,7 @@
 #include "catalog/heap.h"
 #include "catalog/index.h"
 #include "catalog/objectaccess.h"
+#include "catalog/partition.h"
 #include "catalog/pg_attrdef.h"
 #include "catalog/pg_collation.h"
 #include "catalog/pg_constraint.h"
@@ -48,6 +49,8 @@
 #include "catalog/pg_foreign_table.h"
 #include "catalog/pg_inherits.h"
 #include "catalog/pg_namespace.h"
+#include "catalog/pg_opclass.h"
+#include "catalog/pg_partitioned_table.h"
 #include "catalog/pg_statistic.h"
 #include "catalog/pg_tablespace.h"
 #include "catalog/pg_type.h"
@@ -808,6 +811,7 @@ InsertPgClassTuple(Relation pg_class_desc,
        values[Anum_pg_class_relhassubclass - 1] = BoolGetDatum(rd_rel->relhassubclass);
        values[Anum_pg_class_relispopulated - 1] = BoolGetDatum(rd_rel->relispopulated);
        values[Anum_pg_class_relreplident - 1] = CharGetDatum(rd_rel->relreplident);
+       values[Anum_pg_class_relispartition - 1] = BoolGetDatum(rd_rel->relispartition);
        values[Anum_pg_class_relfrozenxid - 1] = TransactionIdGetDatum(rd_rel->relfrozenxid);
        values[Anum_pg_class_relminmxid - 1] = MultiXactIdGetDatum(rd_rel->relminmxid);
        if (relacl != (Datum) 0)
@@ -819,6 +823,9 @@ InsertPgClassTuple(Relation pg_class_desc,
        else
                nulls[Anum_pg_class_reloptions - 1] = true;
 
+       /* relpartbound is set by updating this tuple, if necessary */
+       nulls[Anum_pg_class_relpartbound - 1] = true;
+
        tup = heap_form_tuple(RelationGetDescr(pg_class_desc), values, nulls);
 
        /*
@@ -924,6 +931,9 @@ AddNewRelationTuple(Relation pg_class_desc,
        new_rel_reltup->reltype = new_type_oid;
        new_rel_reltup->reloftype = reloftype;
 
+       /* relispartition is always set by updating this tuple later */
+       new_rel_reltup->relispartition = false;
+
        new_rel_desc->rd_att->tdtypeid = new_type_oid;
 
        /* Now build and insert the tuple */
@@ -1104,7 +1114,8 @@ heap_create_with_catalog(const char *relname,
                if (IsBinaryUpgrade &&
                        (relkind == RELKIND_RELATION || relkind == RELKIND_SEQUENCE ||
                         relkind == RELKIND_VIEW || relkind == RELKIND_MATVIEW ||
-                        relkind == RELKIND_COMPOSITE_TYPE || relkind == RELKIND_FOREIGN_TABLE))
+                        relkind == RELKIND_COMPOSITE_TYPE || relkind == RELKIND_FOREIGN_TABLE ||
+                        relkind == RELKIND_PARTITIONED_TABLE))
                {
                        if (!OidIsValid(binary_upgrade_next_heap_pg_class_oid))
                                ereport(ERROR,
@@ -1138,6 +1149,7 @@ heap_create_with_catalog(const char *relname,
                        case RELKIND_VIEW:
                        case RELKIND_MATVIEW:
                        case RELKIND_FOREIGN_TABLE:
+                       case RELKIND_PARTITIONED_TABLE:
                                relacl = get_user_default_acl(ACL_OBJECT_RELATION, ownerid,
                                                                                          relnamespace);
                                break;
@@ -1182,7 +1194,8 @@ heap_create_with_catalog(const char *relname,
                                                          relkind == RELKIND_VIEW ||
                                                          relkind == RELKIND_MATVIEW ||
                                                          relkind == RELKIND_FOREIGN_TABLE ||
-                                                         relkind == RELKIND_COMPOSITE_TYPE))
+                                                         relkind == RELKIND_COMPOSITE_TYPE ||
+                                                         relkind == RELKIND_PARTITIONED_TABLE))
                new_array_oid = AssignTypeArrayOid();
 
        /*
@@ -1349,7 +1362,9 @@ heap_create_with_catalog(const char *relname,
        if (relpersistence == RELPERSISTENCE_UNLOGGED)
        {
                Assert(relkind == RELKIND_RELATION || relkind == RELKIND_MATVIEW ||
-                          relkind == RELKIND_TOASTVALUE);
+                          relkind == RELKIND_TOASTVALUE ||
+                          relkind == RELKIND_PARTITIONED_TABLE);
+
                heap_create_init_fork(new_rel_desc);
        }
 
@@ -1754,12 +1769,29 @@ void
 heap_drop_with_catalog(Oid relid)
 {
        Relation        rel;
+       Oid                     parentOid;
+       Relation        parent = NULL;
 
        /*
         * Open and lock the relation.
         */
        rel = relation_open(relid, AccessExclusiveLock);
 
+       /*
+        * If the relation is a partition, we must grab exclusive lock on its
+        * parent because we need to update its partition descriptor. We must
+        * take a table lock strong enough to prevent all queries on the parent
+        * from proceeding until we commit and send out a shared-cache-inval
+        * notice that will make them update their partition descriptor.
+        * Sometimes, doing this is cycles spent uselessly, especially if the
+        * parent will be dropped as part of the same command anyway.
+        */
+       if (rel->rd_rel->relispartition)
+       {
+               parentOid = get_partition_parent(relid);
+               parent = heap_open(parentOid, AccessExclusiveLock);
+       }
+
        /*
         * There can no longer be anyone *else* touching the relation, but we
         * might still have open queries or cursors, or pending trigger events, in
@@ -1795,6 +1827,12 @@ heap_drop_with_catalog(Oid relid)
                heap_close(rel, RowExclusiveLock);
        }
 
+       /*
+        * If a partitioned table, delete the pg_partitioned_table tuple.
+        */
+       if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+               RemovePartitionKeyByRelId(relid);
+
        /*
         * Schedule unlinking of the relation's physical files at commit.
         */
@@ -1845,6 +1883,12 @@ heap_drop_with_catalog(Oid relid)
         * delete relation tuple
         */
        DeleteRelationTuple(relid);
+
+       if (parent)
+       {
+               CacheInvalidateRelcache(parent);
+               heap_close(parent, NoLock);             /* keep the lock */
+       }
 }
 
 
@@ -2027,6 +2071,17 @@ StoreRelCheck(Relation rel, char *ccname, Node *expr,
        else
                attNos = NULL;
 
+       /*
+        * Partitioned tables do not contain any rows themselves, so a NO INHERIT
+        * constraint makes no sense.
+        */
+       if (is_no_inherit &&
+               rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+                                errmsg("cannot add NO INHERIT constraint to partitioned table \"%s\"",
+                                                RelationGetRelationName(rel))));
+
        /*
         * Create the Check Constraint
         */
@@ -2440,8 +2495,11 @@ MergeWithExistingConstraint(Relation rel, char *ccname, Node *expr,
                         * definition) then interpret addition of a local constraint as a
                         * legal merge.  This allows ALTER ADD CONSTRAINT on parent and
                         * child tables to be given in either order with same end state.
+                        * However if the relation is a partition, all inherited
+                        * constraints are always non-local, including those that were
+                        * merged.
                         */
-                       if (is_local && !con->conislocal)
+                       if (is_local && !con->conislocal && !rel->rd_rel->relispartition)
                                allow_merge = true;
 
                        if (!found || !allow_merge)
@@ -2486,10 +2544,24 @@ MergeWithExistingConstraint(Relation rel, char *ccname, Node *expr,
                        tup = heap_copytuple(tup);
                        con = (Form_pg_constraint) GETSTRUCT(tup);
 
-                       if (is_local)
-                               con->conislocal = true;
+                       /*
+                        * In case of partitions, an inherited constraint must be
+                        * inherited only once since it cannot have multiple parents and
+                        * it is never considered local.
+                        */
+                       if (rel->rd_rel->relispartition)
+                       {
+                               con->coninhcount = 1;
+                               con->conislocal = false;
+                       }
                        else
-                               con->coninhcount++;
+                       {
+                               if (is_local)
+                                       con->conislocal = true;
+                               else
+                                       con->coninhcount++;
+                       }
+
                        if (is_no_inherit)
                        {
                                Assert(is_local);
@@ -3013,3 +3085,187 @@ insert_ordered_unique_oid(List *list, Oid datum)
        lappend_cell_oid(list, prev, datum);
        return list;
 }
+
+/*
+ * StorePartitionKey
+ *             Store information about the partition key rel into the catalog
+ */
+void
+StorePartitionKey(Relation rel,
+                                 char strategy,
+                                 int16 partnatts,
+                                 AttrNumber *partattrs,
+                                 List *partexprs,
+                                 Oid *partopclass,
+                                 Oid *partcollation)
+{
+       int                     i;
+       int2vector *partattrs_vec;
+       oidvector  *partopclass_vec;
+       oidvector  *partcollation_vec;
+       Datum           partexprDatum;
+       Relation        pg_partitioned_table;
+       HeapTuple       tuple;
+       Datum           values[Natts_pg_partitioned_table];
+       bool            nulls[Natts_pg_partitioned_table];
+       ObjectAddress   myself;
+       ObjectAddress   referenced;
+
+       Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
+
+       tuple = SearchSysCache1(PARTRELID,
+                                                       ObjectIdGetDatum(RelationGetRelid(rel)));
+
+       /* Copy the partition attribute numbers, opclass OIDs into arrays */
+       partattrs_vec = buildint2vector(partattrs, partnatts);
+       partopclass_vec = buildoidvector(partopclass, partnatts);
+       partcollation_vec = buildoidvector(partcollation, partnatts);
+
+       /* Convert the expressions (if any) to a text datum */
+       if (partexprs)
+       {
+               char       *exprString;
+
+               exprString = nodeToString(partexprs);
+               partexprDatum = CStringGetTextDatum(exprString);
+               pfree(exprString);
+       }
+       else
+               partexprDatum = (Datum) 0;
+
+       pg_partitioned_table = heap_open(PartitionedRelationId, RowExclusiveLock);
+
+       MemSet(nulls, false, sizeof(nulls));
+
+       /* Only this can ever be NULL */
+       if (!partexprDatum)
+               nulls[Anum_pg_partitioned_table_partexprs - 1] = true;
+
+       values[Anum_pg_partitioned_table_partrelid - 1] = ObjectIdGetDatum(RelationGetRelid(rel));
+       values[Anum_pg_partitioned_table_partstrat - 1] = CharGetDatum(strategy);
+       values[Anum_pg_partitioned_table_partnatts - 1] = Int16GetDatum(partnatts);
+       values[Anum_pg_partitioned_table_partattrs - 1] =  PointerGetDatum(partattrs_vec);
+       values[Anum_pg_partitioned_table_partclass - 1] = PointerGetDatum(partopclass_vec);
+       values[Anum_pg_partitioned_table_partcollation - 1] = PointerGetDatum(partcollation_vec);
+       values[Anum_pg_partitioned_table_partexprs - 1] = partexprDatum;
+
+       tuple = heap_form_tuple(RelationGetDescr(pg_partitioned_table), values, nulls);
+
+       simple_heap_insert(pg_partitioned_table, tuple);
+
+       /* Update the indexes on pg_partitioned_table */
+       CatalogUpdateIndexes(pg_partitioned_table, tuple);
+       heap_close(pg_partitioned_table, RowExclusiveLock);
+
+       /* Mark this relation as dependent on a few things as follows */
+       myself.classId = RelationRelationId;
+       myself.objectId = RelationGetRelid(rel);;
+       myself.objectSubId = 0;
+
+       /* Operator class and collation per key column */
+       for (i = 0; i < partnatts; i++)
+       {
+               referenced.classId = OperatorClassRelationId;
+               referenced.objectId = partopclass[i];
+               referenced.objectSubId = 0;
+
+               recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+
+               referenced.classId = CollationRelationId;
+               referenced.objectId = partcollation[i];
+               referenced.objectSubId = 0;
+
+               recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+       }
+
+       /*
+        * Anything mentioned in the expressions.  We must ignore the column
+        * references, which will depend on the table itself; there is no
+        * separate partition key object.
+        */
+       if (partexprs)
+               recordDependencyOnSingleRelExpr(&myself,
+                                                                               (Node *) partexprs,
+                                                                               RelationGetRelid(rel),
+                                                                               DEPENDENCY_NORMAL,
+                                                                               DEPENDENCY_AUTO, true);
+
+       /*
+        * We must invalidate the relcache so that the next
+        * CommandCounterIncrement() will cause the same to be rebuilt using the
+        * information in just created catalog entry.
+        */
+       CacheInvalidateRelcache(rel);
+}
+
+/*
+ *  RemovePartitionKeyByRelId
+ *             Remove pg_partitioned_table entry for a relation
+ */
+void
+RemovePartitionKeyByRelId(Oid relid)
+{
+       Relation        rel;
+       HeapTuple       tuple;
+
+       rel = heap_open(PartitionedRelationId, RowExclusiveLock);
+
+       tuple = SearchSysCache1(PARTRELID, ObjectIdGetDatum(relid));
+       if (!HeapTupleIsValid(tuple))
+               elog(ERROR, "cache lookup failed for partition key of relation %u",
+                        relid);
+
+       simple_heap_delete(rel, &tuple->t_self);
+
+       ReleaseSysCache(tuple);
+       heap_close(rel, RowExclusiveLock);
+}
+
+/*
+ * StorePartitionBound
+ *             Update pg_class tuple of rel to store the partition bound and set
+ *             relispartition to true
+ */
+void
+StorePartitionBound(Relation rel, Node *bound)
+{
+       Relation        classRel;
+       HeapTuple       tuple,
+                               newtuple;
+       Datum   new_val[Natts_pg_class];
+       bool    new_null[Natts_pg_class],
+                       new_repl[Natts_pg_class];
+
+       /* Update pg_class tuple */
+       classRel = heap_open(RelationRelationId, RowExclusiveLock);
+       tuple = SearchSysCacheCopy1(RELOID,
+                                                               ObjectIdGetDatum(RelationGetRelid(rel)));
+#ifdef USE_ASSERT_CHECKING
+       {
+               Form_pg_class   classForm;
+               bool    isnull;
+
+               classForm = (Form_pg_class) GETSTRUCT(tuple);
+               Assert(!classForm->relispartition);
+               (void) SysCacheGetAttr(RELOID, tuple, Anum_pg_class_relpartbound,
+                                                          &isnull);
+               Assert(isnull);
+       }
+#endif
+
+       /* Fill in relpartbound value */
+       memset(new_val, 0, sizeof(new_val));
+       memset(new_null, false, sizeof(new_null));
+       memset(new_repl, false, sizeof(new_repl));
+       new_val[Anum_pg_class_relpartbound - 1] = CStringGetTextDatum(nodeToString(bound));
+       new_null[Anum_pg_class_relpartbound - 1] = false;
+       new_repl[Anum_pg_class_relpartbound - 1] = true;
+       newtuple = heap_modify_tuple(tuple, RelationGetDescr(classRel),
+                                                                new_val, new_null, new_repl);
+       /* Also set the flag */
+       ((Form_pg_class) GETSTRUCT(newtuple))->relispartition = true;
+       simple_heap_update(classRel, &newtuple->t_self, newtuple);
+       CatalogUpdateIndexes(classRel, newtuple);
+       heap_freetuple(newtuple);
+       heap_close(classRel, RowExclusiveLock);
+}
index 08b646d8f33cbdedb34eebc30ad452731ca6b377..08b0989112bc9902548980b703fa84eff9e16901 100644 (file)
@@ -1043,7 +1043,7 @@ index_create(Relation heapRelation,
                                                                                  (Node *) indexInfo->ii_Expressions,
                                                                                        heapRelationId,
                                                                                        DEPENDENCY_NORMAL,
-                                                                                       DEPENDENCY_AUTO);
+                                                                                       DEPENDENCY_AUTO, false);
                }
 
                /* Store dependencies on anything mentioned in predicate */
@@ -1053,7 +1053,7 @@ index_create(Relation heapRelation,
                                                                                        (Node *) indexInfo->ii_Predicate,
                                                                                        heapRelationId,
                                                                                        DEPENDENCY_NORMAL,
-                                                                                       DEPENDENCY_AUTO);
+                                                                                       DEPENDENCY_AUTO, false);
                }
        }
        else
index d531d17cdbcb0c37c4a8c7a0cd992147b877c4f7..bb4b080b007915a1e4def685dfd54f0983d8c08b 100644 (file)
@@ -1204,7 +1204,8 @@ get_relation_by_qualified_name(ObjectType objtype, List *objname,
                                                                RelationGetRelationName(relation))));
                        break;
                case OBJECT_TABLE:
-                       if (relation->rd_rel->relkind != RELKIND_RELATION)
+                       if (relation->rd_rel->relkind != RELKIND_RELATION &&
+                               relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
                                ereport(ERROR,
                                                (errcode(ERRCODE_WRONG_OBJECT_TYPE),
                                                 errmsg("\"%s\" is not a table",
@@ -3244,6 +3245,7 @@ getRelationDescription(StringInfo buffer, Oid relid)
        switch (relForm->relkind)
        {
                case RELKIND_RELATION:
+               case RELKIND_PARTITIONED_TABLE:
                        appendStringInfo(buffer, _("table %s"),
                                                         relname);
                        break;
@@ -3701,6 +3703,7 @@ getRelationTypeDescription(StringInfo buffer, Oid relid, int32 objectSubId)
        switch (relForm->relkind)
        {
                case RELKIND_RELATION:
+               case RELKIND_PARTITIONED_TABLE:
                        appendStringInfoString(buffer, "table");
                        break;
                case RELKIND_INDEX:
diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c
new file mode 100644 (file)
index 0000000..6dab45f
--- /dev/null
@@ -0,0 +1,1917 @@
+/*-------------------------------------------------------------------------
+ *
+ * partition.c
+ *               Partitioning related data structures and functions.
+ *
+ * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *               src/backend/catalog/partition.c
+ *
+ *-------------------------------------------------------------------------
+*/
+
+#include "postgres.h"
+
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "access/nbtree.h"
+#include "access/sysattr.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/objectaddress.h"
+#include "catalog/partition.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_inherits.h"
+#include "catalog/pg_inherits_fn.h"
+#include "catalog/pg_opclass.h"
+#include "catalog/pg_type.h"
+#include "executor/executor.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/parsenodes.h"
+#include "optimizer/clauses.h"
+#include "optimizer/planmain.h"
+#include "optimizer/var.h"
+#include "rewrite/rewriteManip.h"
+#include "storage/lmgr.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/datum.h"
+#include "utils/memutils.h"
+#include "utils/fmgroids.h"
+#include "utils/inval.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/ruleutils.h"
+#include "utils/syscache.h"
+
+/*
+ * Information about bounds of a partitioned relation
+ *
+ * A list partition datum that is known to be NULL is never put into the
+ * datums array. Instead, it is tracked using has_null and null_index fields.
+ *
+ * In the case of range partitioning, ndatums will typically be far less than
+ * 2 * nparts, because a partition's upper bound and the next partition's lower
+ * bound are the same in most common cases, and we only store one of them.
+ *
+ * In the case of list partitioning, the indexes array stores one entry for
+ * every datum, which is the index of the partition that accepts a given datum.
+ * In case of range partitioning, it stores one entry per distinct range
+ * datum, which is the index of the partition for which a given datum
+ * is an upper bound.
+ */
+
+/* Ternary value to represent what's contained in a range bound datum */
+typedef enum RangeDatumContent
+{
+       RANGE_DATUM_FINITE = 0,         /* actual datum stored elsewhere */
+       RANGE_DATUM_NEG_INF,            /* negative infinity */
+       RANGE_DATUM_POS_INF                     /* positive infinity */
+} RangeDatumContent;
+
+typedef struct PartitionBoundInfoData
+{
+       char            strategy;               /* list or range bounds? */
+       int                     ndatums;                /* Length of the datums following array */
+       Datum     **datums;                     /* Array of datum-tuples with key->partnatts
+                                                                * datums each */
+       RangeDatumContent **content;/* what's contained in each range bound datum?
+                                                                * (see the above enum); NULL for list
+                                                                * partitioned tables */
+       int                *indexes;            /* Partition indexes; one entry per member of
+                                                                * the datums array (plus one if range
+                                                                * partitioned table) */
+       bool            has_null;               /* Is there a null-accepting partition? false
+                                                                * for range partitioned tables */
+       int                     null_index;             /* Index of the null-accepting partition; -1
+                                                                * for range partitioned tables */
+} PartitionBoundInfoData;
+
+/*
+ * When qsort'ing partition bounds after reading from the catalog, each bound
+ * is represented with one of the following structs.
+ */
+
+/* One value coming from some (index'th) list partition */
+typedef struct PartitionListValue
+{
+       int                     index;
+       Datum           value;
+} PartitionListValue;
+
+/* One bound of a range partition */
+typedef struct PartitionRangeBound
+{
+       int                     index;
+       Datum      *datums;                     /* range bound datums */
+       RangeDatumContent *content; /* what's contained in each datum? */
+       bool            lower;                  /* this is the lower (vs upper) bound */
+} PartitionRangeBound;
+
+static int32 qsort_partition_list_value_cmp(const void *a, const void *b,
+                                                          void *arg);
+static int32 qsort_partition_rbound_cmp(const void *a, const void *b,
+                                                  void *arg);
+
+static List *get_qual_for_list(PartitionKey key, PartitionBoundSpec *spec);
+static List *get_qual_for_range(PartitionKey key, PartitionBoundSpec *spec);
+static Oid get_partition_operator(PartitionKey key, int col,
+                                          StrategyNumber strategy, bool *need_relabel);
+static List *generate_partition_qual(Relation rel, bool recurse);
+
+static PartitionRangeBound *make_one_range_bound(PartitionKey key, int index,
+                                        List *datums, bool lower);
+static int32 partition_rbound_cmp(PartitionKey key,
+                                        Datum *datums1, RangeDatumContent *content1, bool lower1,
+                                        PartitionRangeBound *b2);
+static int32 partition_rbound_datum_cmp(PartitionKey key,
+                                                  Datum *rb_datums, RangeDatumContent *rb_content,
+                                                  Datum *tuple_datums);
+
+static int32 partition_bound_cmp(PartitionKey key,
+                                       PartitionBoundInfo boundinfo,
+                                       int offset, void *probe, bool probe_is_bound);
+static int partition_bound_bsearch(PartitionKey key,
+                                               PartitionBoundInfo boundinfo,
+                                               void *probe, bool probe_is_bound, bool *is_equal);
+
+/* Support get_partition_for_tuple() */
+static void FormPartitionKeyDatum(PartitionDispatch pd,
+                                         TupleTableSlot *slot,
+                                         EState *estate,
+                                         Datum *values,
+                                         bool *isnull);
+
+/*
+ * RelationBuildPartitionDesc
+ *             Form rel's partition descriptor
+ *
+ * Not flushed from the cache by RelationClearRelation() unless changed because
+ * of addition or removal of partition.
+ */
+void
+RelationBuildPartitionDesc(Relation rel)
+{
+       List       *inhoids,
+                          *partoids;
+       Oid                *oids = NULL;
+       List       *boundspecs = NIL;
+       ListCell   *cell;
+       int                     i,
+                               nparts;
+       PartitionKey key = RelationGetPartitionKey(rel);
+       PartitionDesc result;
+       MemoryContext oldcxt;
+
+       int                     ndatums = 0;
+
+       /* List partitioning specific */
+       PartitionListValue **all_values = NULL;
+       bool            found_null = false;
+       int                     null_index = -1;
+
+       /* Range partitioning specific */
+       PartitionRangeBound **rbounds = NULL;
+
+       /*
+        * The following could happen in situations where rel has a pg_class entry
+        * but not the pg_partitioned_table entry yet.
+        */
+       if (key == NULL)
+               return;
+
+       /* Get partition oids from pg_inherits */
+       inhoids = find_inheritance_children(RelationGetRelid(rel), NoLock);
+
+       /* Collect bound spec nodes in a list */
+       i = 0;
+       partoids = NIL;
+       foreach(cell, inhoids)
+       {
+               Oid                     inhrelid = lfirst_oid(cell);
+               HeapTuple       tuple;
+               Datum           datum;
+               bool            isnull;
+               Node       *boundspec;
+
+               tuple = SearchSysCache1(RELOID, inhrelid);
+
+               /*
+                * It is possible that the pg_class tuple of a partition has not been
+                * updated yet to set its relpartbound field.  The only case where
+                * this happens is when we open the parent relation to check using its
+                * partition descriptor that a new partition's bound does not overlap
+                * some existing partition.
+                */
+               if (!((Form_pg_class) GETSTRUCT(tuple))->relispartition)
+               {
+                       ReleaseSysCache(tuple);
+                       continue;
+               }
+
+               datum = SysCacheGetAttr(RELOID, tuple,
+                                                               Anum_pg_class_relpartbound,
+                                                               &isnull);
+               Assert(!isnull);
+               boundspec = (Node *) stringToNode(TextDatumGetCString(datum));
+               boundspecs = lappend(boundspecs, boundspec);
+               partoids = lappend_oid(partoids, inhrelid);
+               ReleaseSysCache(tuple);
+       }
+
+       nparts = list_length(partoids);
+
+       if (nparts > 0)
+       {
+               oids = (Oid *) palloc(nparts * sizeof(Oid));
+               i = 0;
+               foreach(cell, partoids)
+                       oids[i++] = lfirst_oid(cell);
+
+               /* Convert from node to the internal representation */
+               if (key->strategy == PARTITION_STRATEGY_LIST)
+               {
+                       List       *non_null_values = NIL;
+
+                       /*
+                        * Create a unified list of non-null values across all partitions.
+                        */
+                       i = 0;
+                       found_null = false;
+                       null_index = -1;
+                       foreach(cell, boundspecs)
+                       {
+                               ListCell   *c;
+                               PartitionBoundSpec *spec = lfirst(cell);
+
+                               if (spec->strategy != PARTITION_STRATEGY_LIST)
+                                       elog(ERROR, "invalid strategy in partition bound spec");
+
+                               foreach(c, spec->listdatums)
+                               {
+                                       Const      *val = lfirst(c);
+                                       PartitionListValue *list_value = NULL;
+
+                                       if (!val->constisnull)
+                                       {
+                                               list_value = (PartitionListValue *)
+                                                       palloc0(sizeof(PartitionListValue));
+                                               list_value->index = i;
+                                               list_value->value = val->constvalue;
+                                       }
+                                       else
+                                       {
+                                               /*
+                                                * Never put a null into the values array, flag
+                                                * instead for the code further down below where we
+                                                * construct the actual relcache struct.
+                                                */
+                                               if (found_null)
+                                                       elog(ERROR, "found null more than once");
+                                               found_null = true;
+                                               null_index = i;
+                                       }
+
+                                       if (list_value)
+                                               non_null_values = lappend(non_null_values,
+                                                                                                 list_value);
+                               }
+
+                               i++;
+                       }
+
+                       ndatums = list_length(non_null_values);
+
+                       /*
+                        * Collect all list values in one array. Alongside the value, we
+                        * also save the index of partition the value comes from.
+                        */
+                       all_values = (PartitionListValue **) palloc(ndatums *
+                                                                                          sizeof(PartitionListValue *));
+                       i = 0;
+                       foreach(cell, non_null_values)
+                       {
+                               PartitionListValue *src = lfirst(cell);
+
+                               all_values[i] = (PartitionListValue *)
+                                       palloc(sizeof(PartitionListValue));
+                               all_values[i]->value = src->value;
+                               all_values[i]->index = src->index;
+                               i++;
+                       }
+
+                       qsort_arg(all_values, ndatums, sizeof(PartitionListValue *),
+                                         qsort_partition_list_value_cmp, (void *) key);
+               }
+               else if (key->strategy == PARTITION_STRATEGY_RANGE)
+               {
+                       int                     j,
+                                               k;
+                       PartitionRangeBound **all_bounds,
+                                          *prev;
+                       bool       *distinct_indexes;
+
+                       all_bounds = (PartitionRangeBound **) palloc0(2 * nparts *
+                                                                                         sizeof(PartitionRangeBound *));
+                       distinct_indexes = (bool *) palloc(2 * nparts * sizeof(bool));
+
+                       /*
+                        * Create a unified list of range bounds across all the
+                        * partitions.
+                        */
+                       i = j = 0;
+                       foreach(cell, boundspecs)
+                       {
+                               PartitionBoundSpec *spec = lfirst(cell);
+                               PartitionRangeBound *lower,
+                                                  *upper;
+
+                               if (spec->strategy != PARTITION_STRATEGY_RANGE)
+                                       elog(ERROR, "invalid strategy in partition bound spec");
+
+                               lower = make_one_range_bound(key, i, spec->lowerdatums,
+                                                                                        true);
+                               upper = make_one_range_bound(key, i, spec->upperdatums,
+                                                                                        false);
+                               all_bounds[j] = lower;
+                               all_bounds[j + 1] = upper;
+                               j += 2;
+                               i++;
+                       }
+                       Assert(j == 2 * nparts);
+
+                       /* Sort all the bounds in ascending order */
+                       qsort_arg(all_bounds, 2 * nparts,
+                                         sizeof(PartitionRangeBound *),
+                                         qsort_partition_rbound_cmp,
+                                         (void *) key);
+
+                       /*
+                        * Count the number of distinct bounds to allocate an array of
+                        * that size.
+                        */
+                       ndatums = 0;
+                       prev = NULL;
+                       for (i = 0; i < 2 * nparts; i++)
+                       {
+                               PartitionRangeBound *cur = all_bounds[i];
+                               bool            is_distinct = false;
+                               int                     j;
+
+                               /* Is current bound is distinct from the previous? */
+                               for (j = 0; j < key->partnatts; j++)
+                               {
+                                       Datum           cmpval;
+
+                                       if (prev == NULL)
+                                       {
+                                               is_distinct = true;
+                                               break;
+                                       }
+
+                                       /*
+                                        * If either of them has infinite element, we can't equate
+                                        * them.  Even when both are infinite, they'd have
+                                        * opposite signs, because only one of cur and prev is a
+                                        * lower bound).
+                                        */
+                                       if (cur->content[j] != RANGE_DATUM_FINITE ||
+                                               prev->content[j] != RANGE_DATUM_FINITE)
+                                       {
+                                               is_distinct = true;
+                                               break;
+                                       }
+                                       cmpval = FunctionCall2Coll(&key->partsupfunc[j],
+                                                                                          key->partcollation[j],
+                                                                                          cur->datums[j],
+                                                                                          prev->datums[j]);
+                                       if (DatumGetInt32(cmpval) != 0)
+                                       {
+                                               is_distinct = true;
+                                               break;
+                                       }
+                               }
+
+                               /*
+                                * Count the current bound if it is distinct from the previous
+                                * one.  Also, store if the index i contains a distinct bound
+                                * that we'd like put in the relcache array.
+                                */
+                               if (is_distinct)
+                               {
+                                       distinct_indexes[i] = true;
+                                       ndatums++;
+                               }
+                               else
+                                       distinct_indexes[i] = false;
+
+                               prev = cur;
+                       }
+
+                       /*
+                        * Finally save them in an array from where they will be copied
+                        * into the relcache.
+                        */
+                       rbounds = (PartitionRangeBound **) palloc(ndatums *
+                                                                                         sizeof(PartitionRangeBound *));
+                       k = 0;
+                       for (i = 0; i < 2 * nparts; i++)
+                       {
+                               if (distinct_indexes[i])
+                                       rbounds[k++] = all_bounds[i];
+                       }
+                       Assert(k == ndatums);
+               }
+               else
+                       elog(ERROR, "unexpected partition strategy: %d",
+                                (int) key->strategy);
+       }
+
+       /* Now build the actual relcache partition descriptor */
+       rel->rd_pdcxt = AllocSetContextCreate(CacheMemoryContext,
+                                                                                 RelationGetRelationName(rel),
+                                                                                 ALLOCSET_DEFAULT_SIZES);
+       oldcxt = MemoryContextSwitchTo(rel->rd_pdcxt);
+
+       result = (PartitionDescData *) palloc0(sizeof(PartitionDescData));
+       result->nparts = nparts;
+       if (nparts > 0)
+       {
+               PartitionBoundInfo boundinfo;
+               int                *mapping;
+               int                     next_index = 0;
+
+               result->oids = (Oid *) palloc0(nparts * sizeof(Oid));
+
+               boundinfo = (PartitionBoundInfoData *)
+                       palloc0(sizeof(PartitionBoundInfoData));
+               boundinfo->strategy = key->strategy;
+               boundinfo->ndatums = ndatums;
+               boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
+
+               /* Initialize mapping array with invalid values */
+               mapping = (int *) palloc(sizeof(int) * nparts);
+               for (i = 0; i < nparts; i++)
+                       mapping[i] = -1;
+
+               switch (key->strategy)
+               {
+                       case PARTITION_STRATEGY_LIST:
+                               {
+                                       boundinfo->has_null = found_null;
+                                       boundinfo->indexes = (int *) palloc(ndatums * sizeof(int));
+
+                                       /*
+                                        * Copy values.  Indexes of individual values are mapped
+                                        * to canonical values so that they match for any two list
+                                        * partitioned tables with same number of partitions and
+                                        * same lists per partition.  One way to canonicalize is
+                                        * to assign the index in all_values[] of the smallest
+                                        * value of each partition, as the index of all of the
+                                        * partition's values.
+                                        */
+                                       for (i = 0; i < ndatums; i++)
+                                       {
+                                               boundinfo->datums[i] = (Datum *) palloc(sizeof(Datum));
+                                               boundinfo->datums[i][0] = datumCopy(all_values[i]->value,
+                                                                                                               key->parttypbyval[0],
+                                                                                                                key->parttyplen[0]);
+
+                                               /* If the old index has no mapping, assign one */
+                                               if (mapping[all_values[i]->index] == -1)
+                                                       mapping[all_values[i]->index] = next_index++;
+
+                                               boundinfo->indexes[i] = mapping[all_values[i]->index];
+                                       }
+
+                                       /*
+                                        * If null-accepting partition has no mapped index yet,
+                                        * assign one.  This could happen if such partition
+                                        * accepts only null and hence not covered in the above
+                                        * loop which only handled non-null values.
+                                        */
+                                       if (found_null)
+                                       {
+                                               Assert(null_index >= 0);
+                                               if (mapping[null_index] == -1)
+                                                       mapping[null_index] = next_index++;
+                                       }
+
+                                       /* All partition must now have a valid mapping */
+                                       Assert(next_index == nparts);
+
+                                       if (found_null)
+                                               boundinfo->null_index = mapping[null_index];
+                                       else
+                                               boundinfo->null_index = -1;
+                                       break;
+                               }
+
+                       case PARTITION_STRATEGY_RANGE:
+                               {
+                                       boundinfo->content = (RangeDatumContent **) palloc(ndatums *
+                                                                                               sizeof(RangeDatumContent *));
+                                       boundinfo->indexes = (int *) palloc((ndatums + 1) *
+                                                                                                               sizeof(int));
+
+                                       for (i = 0; i < ndatums; i++)
+                                       {
+                                               int                     j;
+
+                                               boundinfo->datums[i] = (Datum *) palloc(key->partnatts *
+                                                                                                                         sizeof(Datum));
+                                               boundinfo->content[i] = (RangeDatumContent *)
+                                                       palloc(key->partnatts *
+                                                                  sizeof(RangeDatumContent));
+                                               for (j = 0; j < key->partnatts; j++)
+                                               {
+                                                       if (rbounds[i]->content[j] == RANGE_DATUM_FINITE)
+                                                               boundinfo->datums[i][j] =
+                                                                       datumCopy(rbounds[i]->datums[j],
+                                                                                         key->parttypbyval[j],
+                                                                                         key->parttyplen[j]);
+                                                       /* Remember, we are storing the tri-state value. */
+                                                       boundinfo->content[i][j] = rbounds[i]->content[j];
+                                               }
+
+                                               /*
+                                                * There is no mapping for invalid indexes.
+                                                *
+                                                * Any lower bounds in the rbounds array have invalid
+                                                * indexes assigned, because the values between the
+                                                * previous bound (if there is one) and this (lower)
+                                                * bound are not part of the range of any existing
+                                                * partition.
+                                                */
+                                               if (rbounds[i]->lower)
+                                                       boundinfo->indexes[i] = -1;
+                                               else
+                                               {
+                                                       int                     orig_index = rbounds[i]->index;
+
+                                                       /* If the old index is has no mapping, assign one */
+                                                       if (mapping[orig_index] == -1)
+                                                               mapping[orig_index] = next_index++;
+
+                                                       boundinfo->indexes[i] = mapping[orig_index];
+                                               }
+                                       }
+                                       boundinfo->indexes[i] = -1;
+                                       break;
+                               }
+
+                       default:
+                               elog(ERROR, "unexpected partition strategy: %d",
+                                        (int) key->strategy);
+               }
+
+               result->boundinfo = boundinfo;
+
+               /*
+                * Now assign OIDs from the original array into mapped indexes of the
+                * result array.  Order of OIDs in the former is defined by the
+                * catalog scan that retrived them, whereas that in the latter is
+                * defined by canonicalized representation of the list values or the
+                * range bounds.
+                */
+               for (i = 0; i < nparts; i++)
+                       result->oids[mapping[i]] = oids[i];
+               pfree(mapping);
+       }
+
+       MemoryContextSwitchTo(oldcxt);
+       rel->rd_partdesc = result;
+}
+
+/*
+ * Are two partition bound collections logically equal?
+ *
+ * Used in the keep logic of relcache.c (ie, in RelationClearRelation()).
+ * This is also useful when b1 and b2 are bound collections of two separate
+ * relations, respectively, because PartitionBoundInfo is a canonical
+ * representation of partition bounds.
+ */
+bool
+partition_bounds_equal(PartitionKey key,
+                                          PartitionBoundInfo b1, PartitionBoundInfo b2)
+{
+       int                     i;
+
+       if (b1->strategy != b2->strategy)
+               return false;
+
+       if (b1->ndatums != b2->ndatums)
+               return false;
+
+       if (b1->has_null != b2->has_null)
+               return false;
+
+       if (b1->null_index != b2->null_index)
+               return false;
+
+       for (i = 0; i < b1->ndatums; i++)
+       {
+               int                     j;
+
+               for (j = 0; j < key->partnatts; j++)
+               {
+                       int32           cmpval;
+
+                       cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[j],
+                                                                                                        key->partcollation[j],
+                                                                                                        b1->datums[i][j],
+                                                                                                        b2->datums[i][j]));
+                       if (cmpval != 0)
+                               return false;
+
+                       /* Range partitions can have infinite datums */
+                       if (b1->content != NULL && b1->content[i][j] != b2->content[i][j])
+                               return false;
+               }
+
+               if (b1->indexes[i] != b2->indexes[i])
+                       return false;
+       }
+
+       /* There are ndatums+1 indexes in case of range partitions */
+       if (key->strategy == PARTITION_STRATEGY_RANGE &&
+               b1->indexes[i] != b2->indexes[i])
+               return false;
+
+       return true;
+}
+
+/*
+ * check_new_partition_bound
+ *
+ * Checks if the new partition's bound overlaps any of the existing partitions
+ * of parent.  Also performs additional checks as necessary per strategy.
+ */
+void
+check_new_partition_bound(char *relname, Relation parent, Node *bound)
+{
+       PartitionBoundSpec *spec = (PartitionBoundSpec *) bound;
+       PartitionKey key = RelationGetPartitionKey(parent);
+       PartitionDesc partdesc = RelationGetPartitionDesc(parent);
+       ParseState *pstate = make_parsestate(NULL);
+       int                     with = -1;
+       bool            overlap = false;
+
+       switch (key->strategy)
+       {
+               case PARTITION_STRATEGY_LIST:
+                       {
+                               Assert(spec->strategy == PARTITION_STRATEGY_LIST);
+
+                               if (partdesc->nparts > 0)
+                               {
+                                       PartitionBoundInfo boundinfo = partdesc->boundinfo;
+                                       ListCell   *cell;
+
+                                       Assert(boundinfo &&
+                                                  boundinfo->strategy == PARTITION_STRATEGY_LIST &&
+                                                  (boundinfo->ndatums > 0 || boundinfo->has_null));
+
+                                       foreach(cell, spec->listdatums)
+                                       {
+                                               Const      *val = lfirst(cell);
+
+                                               if (!val->constisnull)
+                                               {
+                                                       int                     offset;
+                                                       bool            equal;
+
+                                                       offset = partition_bound_bsearch(key, boundinfo,
+                                                                                                                        &val->constvalue,
+                                                                                                                        true, &equal);
+                                                       if (offset >= 0 && equal)
+                                                       {
+                                                               overlap = true;
+                                                               with = boundinfo->indexes[offset];
+                                                               break;
+                                                       }
+                                               }
+                                               else if (boundinfo->has_null)
+                                               {
+                                                       overlap = true;
+                                                       with = boundinfo->null_index;
+                                                       break;
+                                               }
+                                       }
+                               }
+
+                               break;
+                       }
+
+               case PARTITION_STRATEGY_RANGE:
+                       {
+                               PartitionRangeBound *lower,
+                                                  *upper;
+
+                               Assert(spec->strategy == PARTITION_STRATEGY_RANGE);
+                               lower = make_one_range_bound(key, -1, spec->lowerdatums, true);
+                               upper = make_one_range_bound(key, -1, spec->upperdatums, false);
+
+                               /*
+                                * First check if the resulting range would be empty with
+                                * specified lower and upper bounds
+                                */
+                               if (partition_rbound_cmp(key, lower->datums, lower->content, true,
+                                                                                upper) >= 0)
+                                       ereport(ERROR,
+                                                       (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+                                       errmsg("cannot create range partition with empty range"),
+                                                        parser_errposition(pstate, spec->location)));
+
+                               if (partdesc->nparts > 0)
+                               {
+                                       PartitionBoundInfo boundinfo = partdesc->boundinfo;
+                                       int                     off1,
+                                                               off2;
+                                       bool            equal = false;
+
+                                       Assert(boundinfo && boundinfo->ndatums > 0 &&
+                                                  boundinfo->strategy == PARTITION_STRATEGY_RANGE);
+
+                                       /*
+                                        * Find the greatest index of a range bound that is less
+                                        * than or equal with the new lower bound.
+                                        */
+                                       off1 = partition_bound_bsearch(key, boundinfo, lower, true,
+                                                                                                  &equal);
+
+                                       /*
+                                        * If equal has been set to true, that means the new lower
+                                        * bound is found to be equal with the bound at off1,
+                                        * which clearly means an overlap with the partition at
+                                        * index off1+1).
+                                        *
+                                        * Otherwise, check if there is a "gap" that could be
+                                        * occupied by the new partition.  In case of a gap, the
+                                        * new upper bound should not cross past the upper
+                                        * boundary of the gap, that is, off2 == off1 should be
+                                        * true.
+                                        */
+                                       if (!equal && boundinfo->indexes[off1 + 1] < 0)
+                                       {
+                                               off2 = partition_bound_bsearch(key, boundinfo, upper,
+                                                                                                          true, &equal);
+
+                                               if (equal || off1 != off2)
+                                               {
+                                                       overlap = true;
+                                                       with = boundinfo->indexes[off2 + 1];
+                                               }
+                                       }
+                                       else
+                                       {
+                                               overlap = true;
+                                               with = boundinfo->indexes[off1 + 1];
+                                       }
+                               }
+
+                               break;
+                       }
+
+               default:
+                       elog(ERROR, "unexpected partition strategy: %d",
+                                (int) key->strategy);
+       }
+
+       if (overlap)
+       {
+               Assert(with >= 0);
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+                                errmsg("partition \"%s\" would overlap partition \"%s\"",
+                                               relname, get_rel_name(partdesc->oids[with])),
+                                parser_errposition(pstate, spec->location)));
+       }
+}
+
+/*
+ * get_partition_parent
+ *
+ * Returns inheritance parent of a partition by scanning pg_inherits
+ *
+ * Note: Because this function assumes that the relation whose OID is passed
+ * as an argument will have precisely one parent, it should only be called
+ * when it is known that the relation is a partition.
+ */
+Oid
+get_partition_parent(Oid relid)
+{
+       Form_pg_inherits form;
+       Relation        catalogRelation;
+       SysScanDesc scan;
+       ScanKeyData key[2];
+       HeapTuple       tuple;
+       Oid                     result;
+
+       catalogRelation = heap_open(InheritsRelationId, AccessShareLock);
+
+       ScanKeyInit(&key[0],
+                               Anum_pg_inherits_inhrelid,
+                               BTEqualStrategyNumber, F_OIDEQ,
+                               ObjectIdGetDatum(relid));
+       ScanKeyInit(&key[1],
+                               Anum_pg_inherits_inhseqno,
+                               BTEqualStrategyNumber, F_INT4EQ,
+                               Int32GetDatum(1));
+
+       scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId, true,
+                                                         NULL, 2, key);
+
+       tuple = systable_getnext(scan);
+       Assert(HeapTupleIsValid(tuple));
+
+       form = (Form_pg_inherits) GETSTRUCT(tuple);
+       result = form->inhparent;
+
+       systable_endscan(scan);
+       heap_close(catalogRelation, AccessShareLock);
+
+       return result;
+}
+
+/*
+ * get_qual_from_partbound
+ *             Given a parser node for partition bound, return the list of executable
+ *             expressions as partition constraint
+ */
+List *
+get_qual_from_partbound(Relation rel, Relation parent, Node *bound)
+{
+       PartitionBoundSpec *spec = (PartitionBoundSpec *) bound;
+       PartitionKey key = RelationGetPartitionKey(parent);
+       List       *my_qual = NIL;
+       TupleDesc       parent_tupdesc = RelationGetDescr(parent);
+       AttrNumber      parent_attno;
+       AttrNumber *partition_attnos;
+       bool            found_whole_row;
+
+       Assert(key != NULL);
+
+       switch (key->strategy)
+       {
+               case PARTITION_STRATEGY_LIST:
+                       Assert(spec->strategy == PARTITION_STRATEGY_LIST);
+                       my_qual = get_qual_for_list(key, spec);
+                       break;
+
+               case PARTITION_STRATEGY_RANGE:
+                       Assert(spec->strategy == PARTITION_STRATEGY_RANGE);
+                       my_qual = get_qual_for_range(key, spec);
+                       break;
+
+               default:
+                       elog(ERROR, "unexpected partition strategy: %d",
+                                (int) key->strategy);
+       }
+
+       /*
+        * Translate vars in the generated expression to have correct attnos. Note
+        * that the vars in my_qual bear attnos dictated by key which carries
+        * physical attnos of the parent.  We must allow for a case where physical
+        * attnos of a partition can be different from the parent.
+        */
+       partition_attnos = (AttrNumber *)
+               palloc0(parent_tupdesc->natts * sizeof(AttrNumber));
+       for (parent_attno = 1; parent_attno <= parent_tupdesc->natts;
+                parent_attno++)
+       {
+               Form_pg_attribute attribute = parent_tupdesc->attrs[parent_attno - 1];
+               char       *attname = NameStr(attribute->attname);
+               AttrNumber      partition_attno;
+
+               if (attribute->attisdropped)
+                       continue;
+
+               partition_attno = get_attnum(RelationGetRelid(rel), attname);
+               partition_attnos[parent_attno - 1] = partition_attno;
+       }
+
+       my_qual = (List *) map_variable_attnos((Node *) my_qual,
+                                                                                  1, 0,
+                                                                                  partition_attnos,
+                                                                                  parent_tupdesc->natts,
+                                                                                  &found_whole_row);
+       /* there can never be a whole-row reference here */
+       if (found_whole_row)
+               elog(ERROR, "unexpected whole-row reference found in partition key");
+
+       return my_qual;
+}
+
+/*
+ * RelationGetPartitionQual
+ *
+ * Returns a list of partition quals
+ */
+List *
+RelationGetPartitionQual(Relation rel, bool recurse)
+{
+       /* Quick exit */
+       if (!rel->rd_rel->relispartition)
+               return NIL;
+
+       return generate_partition_qual(rel, recurse);
+}
+
+/* Turn an array of OIDs with N elements into a list */
+#define OID_ARRAY_TO_LIST(arr, N, list) \
+       do\
+       {\
+               int             i;\
+               for (i = 0; i < (N); i++)\
+                       (list) = lappend_oid((list), (arr)[i]);\
+       } while(0)
+
+/*
+ * RelationGetPartitionDispatchInfo
+ *             Returns information necessary to route tuples down a partition tree
+ *
+ * All the partitions will be locked with lockmode, unless it is NoLock.
+ * A list of the OIDs of all the leaf partition of rel is returned in
+ * *leaf_part_oids.
+ */
+PartitionDispatch *
+RelationGetPartitionDispatchInfo(Relation rel, int lockmode,
+                                                                int *num_parted, List **leaf_part_oids)
+{
+       PartitionDesc rootpartdesc = RelationGetPartitionDesc(rel);
+       PartitionDispatchData **pd;
+       List       *all_parts = NIL,
+                          *parted_rels;
+       ListCell   *lc;
+       int                     i,
+                               k;
+
+       /*
+        * Lock partitions and make a list of the partitioned ones to prepare
+        * their PartitionDispatch objects below.
+        *
+        * Cannot use find_all_inheritors() here, because then the order of OIDs
+        * in parted_rels list would be unknown, which does not help, because we
+        * we assign indexes within individual PartitionDispatch in an order that
+        * is predetermined (determined by the order of OIDs in individual
+        * partition descriptors).
+        */
+       *num_parted = 1;
+       parted_rels = list_make1(rel);
+       OID_ARRAY_TO_LIST(rootpartdesc->oids, rootpartdesc->nparts, all_parts);
+       foreach(lc, all_parts)
+       {
+               Relation        partrel = heap_open(lfirst_oid(lc), lockmode);
+               PartitionDesc partdesc = RelationGetPartitionDesc(partrel);
+
+               /*
+                * If this partition is a partitioned table, add its children to the
+                * end of the list, so that they are processed as well.
+                */
+               if (partdesc)
+               {
+                       (*num_parted)++;
+                       parted_rels = lappend(parted_rels, partrel);
+                       OID_ARRAY_TO_LIST(partdesc->oids, partdesc->nparts, all_parts);
+               }
+               else
+                       heap_close(partrel, NoLock);
+
+               /*
+                * We keep the partitioned ones open until we're done using the
+                * information being collected here (for example, see
+                * ExecEndModifyTable).
+                */
+       }
+
+       /* Generate PartitionDispatch objects for all partitioned tables */
+       pd = (PartitionDispatchData **) palloc(*num_parted *
+                                                                                  sizeof(PartitionDispatchData *));
+       *leaf_part_oids = NIL;
+       i = k = 0;
+       foreach(lc, parted_rels)
+       {
+               Relation        partrel = lfirst(lc);
+               PartitionKey partkey = RelationGetPartitionKey(partrel);
+               PartitionDesc partdesc = RelationGetPartitionDesc(partrel);
+               int                     j,
+                                       m;
+
+               pd[i] = (PartitionDispatch) palloc(sizeof(PartitionDispatchData));
+               pd[i]->reldesc = partrel;
+               pd[i]->key = partkey;
+               pd[i]->keystate = NIL;
+               pd[i]->partdesc = partdesc;
+               pd[i]->indexes = (int *) palloc(partdesc->nparts * sizeof(int));
+
+               m = 0;
+               for (j = 0; j < partdesc->nparts; j++)
+               {
+                       Oid                     partrelid = partdesc->oids[j];
+
+                       if (get_rel_relkind(partrelid) != RELKIND_PARTITIONED_TABLE)
+                       {
+                               *leaf_part_oids = lappend_oid(*leaf_part_oids, partrelid);
+                               pd[i]->indexes[j] = k++;
+                       }
+                       else
+                       {
+                               /*
+                                * We can assign indexes this way because of the way
+                                * parted_rels has been generated.
+                                */
+                               pd[i]->indexes[j] = -(i + 1 + m);
+                               m++;
+                       }
+               }
+               i++;
+       }
+
+       return pd;
+}
+
+/* Module-local functions */
+
+/*
+ * get_qual_for_list
+ *
+ * Returns a list of expressions to use as a list partition's constraint.
+ */
+static List *
+get_qual_for_list(PartitionKey key, PartitionBoundSpec *spec)
+{
+       List       *result;
+       ArrayExpr  *arr;
+       ScalarArrayOpExpr *opexpr;
+       ListCell   *cell,
+                          *prev,
+                          *next;
+       Node       *keyCol;
+       Oid                     operoid;
+       bool            need_relabel,
+                               list_has_null = false;
+       NullTest   *nulltest1 = NULL,
+                          *nulltest2 = NULL;
+
+       /* Left operand is either a simple Var or arbitrary expression */
+       if (key->partattrs[0] != 0)
+               keyCol = (Node *) makeVar(1,
+                                                                 key->partattrs[0],
+                                                                 key->parttypid[0],
+                                                                 key->parttypmod[0],
+                                                                 key->parttypcoll[0],
+                                                                 0);
+       else
+               keyCol = (Node *) copyObject(linitial(key->partexprs));
+
+       /*
+        * We must remove any NULL value in the list; we handle it separately
+        * below.
+        */
+       prev = NULL;
+       for (cell = list_head(spec->listdatums); cell; cell = next)
+       {
+               Const      *val = (Const *) lfirst(cell);
+
+               next = lnext(cell);
+
+               if (val->constisnull)
+               {
+                       list_has_null = true;
+                       spec->listdatums = list_delete_cell(spec->listdatums,
+                                                                                               cell, prev);
+               }
+               else
+                       prev = cell;
+       }
+
+       if (!list_has_null)
+       {
+               /*
+                * Gin up a col IS NOT NULL test that will be AND'd with other
+                * expressions
+                */
+               nulltest1 = makeNode(NullTest);
+               nulltest1->arg = (Expr *) keyCol;
+               nulltest1->nulltesttype = IS_NOT_NULL;
+               nulltest1->argisrow = false;
+               nulltest1->location = -1;
+       }
+       else
+       {
+               /*
+                * Gin up a col IS NULL test that will be OR'd with other expressions
+                */
+               nulltest2 = makeNode(NullTest);
+               nulltest2->arg = (Expr *) keyCol;
+               nulltest2->nulltesttype = IS_NULL;
+               nulltest2->argisrow = false;
+               nulltest2->location = -1;
+       }
+
+       /* Right operand is an ArrayExpr containing this partition's values */
+       arr = makeNode(ArrayExpr);
+       arr->array_typeid = !type_is_array(key->parttypid[0])
+               ? get_array_type(key->parttypid[0])
+               : key->parttypid[0];
+       arr->array_collid = key->parttypcoll[0];
+       arr->element_typeid = key->parttypid[0];
+       arr->elements = spec->listdatums;
+       arr->multidims = false;
+       arr->location = -1;
+
+       /* Get the correct btree equality operator */
+       operoid = get_partition_operator(key, 0, BTEqualStrategyNumber,
+                                                                        &need_relabel);
+       if (need_relabel || key->partcollation[0] != key->parttypcoll[0])
+               keyCol = (Node *) makeRelabelType((Expr *) keyCol,
+                                                                                 key->partopcintype[0],
+                                                                                 -1,
+                                                                                 key->partcollation[0],
+                                                                                 COERCE_EXPLICIT_CAST);
+
+       /* Build leftop = ANY (rightop) */
+       opexpr = makeNode(ScalarArrayOpExpr);
+       opexpr->opno = operoid;
+       opexpr->opfuncid = get_opcode(operoid);
+       opexpr->useOr = true;
+       opexpr->inputcollid = key->partcollation[0];
+       opexpr->args = list_make2(keyCol, arr);
+       opexpr->location = -1;
+
+       if (nulltest1)
+               result = list_make2(nulltest1, opexpr);
+       else if (nulltest2)
+       {
+               Expr       *or;
+
+               or = makeBoolExpr(OR_EXPR, list_make2(nulltest2, opexpr), -1);
+               result = list_make1(or);
+       }
+       else
+               result = list_make1(opexpr);
+
+       return result;
+}
+
+/*
+ * get_qual_for_range
+ *
+ * Get a list of OpExpr's to use as a range partition's constraint.
+ */
+static List *
+get_qual_for_range(PartitionKey key, PartitionBoundSpec *spec)
+{
+       List       *result = NIL;
+       ListCell   *cell1,
+                          *cell2,
+                          *partexprs_item;
+       int                     i;
+
+       /*
+        * Iterate over columns of the key, emitting an OpExpr for each using the
+        * corresponding lower and upper datums as constant operands.
+        */
+       i = 0;
+       partexprs_item = list_head(key->partexprs);
+       forboth(cell1, spec->lowerdatums, cell2, spec->upperdatums)
+       {
+               PartitionRangeDatum *ldatum = lfirst(cell1),
+                                  *udatum = lfirst(cell2);
+               Node       *keyCol;
+               Const      *lower_val = NULL,
+                                  *upper_val = NULL;
+               EState     *estate;
+               MemoryContext oldcxt;
+               Expr       *test_expr;
+               ExprState  *test_exprstate;
+               Datum           test_result;
+               bool            isNull;
+               bool            need_relabel = false;
+               Oid                     operoid;
+               NullTest   *nulltest;
+
+               /* Left operand */
+               if (key->partattrs[i] != 0)
+               {
+                       keyCol = (Node *) makeVar(1,
+                                                                         key->partattrs[i],
+                                                                         key->parttypid[i],
+                                                                         key->parttypmod[i],
+                                                                         key->parttypcoll[i],
+                                                                         0);
+               }
+               else
+               {
+                       keyCol = (Node *) copyObject(lfirst(partexprs_item));
+                       partexprs_item = lnext(partexprs_item);
+               }
+
+               /*
+                * Emit a IS NOT NULL expression for non-Var keys, because whereas
+                * simple attributes are covered by NOT NULL constraints, expression
+                * keys are still nullable which is not acceptable in case of range
+                * partitioning.
+                */
+               if (!IsA(keyCol, Var))
+               {
+                       nulltest = makeNode(NullTest);
+                       nulltest->arg = (Expr *) keyCol;
+                       nulltest->nulltesttype = IS_NOT_NULL;
+                       nulltest->argisrow = false;
+                       nulltest->location = -1;
+                       result = lappend(result, nulltest);
+               }
+
+               /*
+                * Stop at this column if either of lower or upper datum is infinite,
+                * but do emit an OpExpr for the non-infinite datum.
+                */
+               if (!ldatum->infinite)
+                       lower_val = (Const *) ldatum->value;
+               if (!udatum->infinite)
+                       upper_val = (Const *) udatum->value;
+
+               /*
+                * If lower_val and upper_val are both finite and happen to be equal,
+                * emit only (keyCol = lower_val) for this column, because all rows in
+                * this partition could only ever contain this value (ie, lower_val)
+                * in the current partitioning column.  We must consider further
+                * columns because the above condition does not fully constrain the
+                * rows of this partition.
+                */
+               if (lower_val && upper_val)
+               {
+                       /* Get the correct btree equality operator for the test */
+                       operoid = get_partition_operator(key, i, BTEqualStrategyNumber,
+                                                                                        &need_relabel);
+
+                       /* Create the test expression */
+                       estate = CreateExecutorState();
+                       oldcxt = MemoryContextSwitchTo(estate->es_query_cxt);
+                       test_expr = make_opclause(operoid,
+                                                                         BOOLOID,
+                                                                         false,
+                                                                         (Expr *) lower_val,
+                                                                         (Expr *) upper_val,
+                                                                         InvalidOid,
+                                                                         key->partcollation[i]);
+                       fix_opfuncids((Node *) test_expr);
+                       test_exprstate = ExecInitExpr(test_expr, NULL);
+                       test_result = ExecEvalExprSwitchContext(test_exprstate,
+                                                                                         GetPerTupleExprContext(estate),
+                                                                                                       &isNull, NULL);
+                       MemoryContextSwitchTo(oldcxt);
+                       FreeExecutorState(estate);
+
+                       if (DatumGetBool(test_result))
+                       {
+                               /* This can never be, but it's better to make sure */
+                               if (i == key->partnatts - 1)
+                                       elog(ERROR, "invalid range bound specification");
+
+                               if (need_relabel || key->partcollation[i] != key->parttypcoll[i])
+                                       keyCol = (Node *) makeRelabelType((Expr *) keyCol,
+                                                                                                         key->partopcintype[i],
+                                                                                                         -1,
+                                                                                                         key->partcollation[i],
+                                                                                                         COERCE_EXPLICIT_CAST);
+                               result = lappend(result,
+                                                                make_opclause(operoid,
+                                                                                          BOOLOID,
+                                                                                          false,
+                                                                                          (Expr *) keyCol,
+                                                                                          (Expr *) lower_val,
+                                                                                          InvalidOid,
+                                                                                          key->partcollation[i]));
+
+                               /* Go over to consider the next column. */
+                               i++;
+                               continue;
+                       }
+               }
+
+               /*
+                * We can say here that lower_val != upper_val.  Emit expressions
+                * (keyCol >= lower_val) and (keyCol < upper_val), then stop.
+                */
+               if (lower_val)
+               {
+                       operoid = get_partition_operator(key, i,
+                                                                                        BTGreaterEqualStrategyNumber,
+                                                                                        &need_relabel);
+
+                       if (need_relabel || key->partcollation[i] != key->parttypcoll[i])
+                               keyCol = (Node *) makeRelabelType((Expr *) keyCol,
+                                                                                                 key->partopcintype[i],
+                                                                                                 -1,
+                                                                                                 key->partcollation[i],
+                                                                                                 COERCE_EXPLICIT_CAST);
+                       result = lappend(result,
+                                                        make_opclause(operoid,
+                                                                                  BOOLOID,
+                                                                                  false,
+                                                                                  (Expr *) keyCol,
+                                                                                  (Expr *) lower_val,
+                                                                                  InvalidOid,
+                                                                                  key->partcollation[i]));
+               }
+
+               if (upper_val)
+               {
+                       operoid = get_partition_operator(key, i,
+                                                                                        BTLessStrategyNumber,
+                                                                                        &need_relabel);
+
+                       if (need_relabel || key->partcollation[i] != key->parttypcoll[i])
+                               keyCol = (Node *) makeRelabelType((Expr *) keyCol,
+                                                                                                 key->partopcintype[i],
+                                                                                                 -1,
+                                                                                                 key->partcollation[i],
+                                                                                                 COERCE_EXPLICIT_CAST);
+
+                       result = lappend(result,
+                                                        make_opclause(operoid,
+                                                                                  BOOLOID,
+                                                                                  false,
+                                                                                  (Expr *) keyCol,
+                                                                                  (Expr *) upper_val,
+                                                                                  InvalidOid,
+                                                                                  key->partcollation[i]));
+               }
+
+               /*
+                * We can stop at this column, because we would not have checked the
+                * next column when routing a given row into this partition.
+                */
+               break;
+       }
+
+       return result;
+}
+
+/*
+ * get_partition_operator
+ *
+ * Return oid of the operator of given strategy for a given partition key
+ * column.
+ */
+static Oid
+get_partition_operator(PartitionKey key, int col, StrategyNumber strategy,
+                                          bool *need_relabel)
+{
+       Oid                     operoid;
+
+       /*
+        * First check if there exists an operator of the given strategy, with
+        * this column's type as both its lefttype and righttype, in the
+        * partitioning operator family specified for the column.
+        */
+       operoid = get_opfamily_member(key->partopfamily[col],
+                                                                 key->parttypid[col],
+                                                                 key->parttypid[col],
+                                                                 strategy);
+
+       /*
+        * If one doesn't exist, we must resort to using an operator in the same
+        * opreator family but with the operator class declared input type.  It is
+        * OK to do so, because the column's type is known to be binary-coercible
+        * with the operator class input type (otherwise, the operator class in
+        * question would not have been accepted as the partitioning operator
+        * class).  We must however inform the caller to wrap the non-Const
+        * expression with a RelabelType node to denote the implicit coercion. It
+        * ensures that the resulting expression structurally matches similarly
+        * processed expressions within the optimizer.
+        */
+       if (!OidIsValid(operoid))
+       {
+               operoid = get_opfamily_member(key->partopfamily[col],
+                                                                         key->partopcintype[col],
+                                                                         key->partopcintype[col],
+                                                                         strategy);
+               *need_relabel = true;
+       }
+       else
+               *need_relabel = false;
+
+       if (!OidIsValid(operoid))
+               elog(ERROR, "could not find operator for partitioning");
+
+       return operoid;
+}
+
+/*
+ * generate_partition_qual
+ *
+ * Generate partition predicate from rel's partition bound expression
+ *
+ * Result expression tree is stored CacheMemoryContext to ensure it survives
+ * as long as the relcache entry. But we should be running in a less long-lived
+ * working context. To avoid leaking cache memory if this routine fails partway
+ * through, we build in working memory and then copy the completed structure
+ * into cache memory.
+ */
+static List *
+generate_partition_qual(Relation rel, bool recurse)
+{
+       HeapTuple       tuple;
+       MemoryContext oldcxt;
+       Datum           boundDatum;
+       bool            isnull;
+       Node       *bound;
+       List       *my_qual = NIL,
+                          *result = NIL;
+       Relation        parent;
+
+       /* Guard against stack overflow due to overly deep partition tree */
+       check_stack_depth();
+
+       /* Grab at least an AccessShareLock on the parent table */
+       parent = heap_open(get_partition_parent(RelationGetRelid(rel)),
+                                          AccessShareLock);
+
+       /* Quick copy */
+       if (rel->rd_partcheck)
+       {
+               if (parent->rd_rel->relispartition && recurse)
+                       result = list_concat(generate_partition_qual(parent, true),
+                                                                copyObject(rel->rd_partcheck));
+               else
+                       result = copyObject(rel->rd_partcheck);
+
+               heap_close(parent, AccessShareLock);
+               return result;
+       }
+
+       /* Get pg_class.relpartbound */
+       if (!rel->rd_rel->relispartition)       /* should not happen */
+               elog(ERROR, "relation \"%s\" has relispartition = false",
+                        RelationGetRelationName(rel));
+       tuple = SearchSysCache1(RELOID, RelationGetRelid(rel));
+       boundDatum = SysCacheGetAttr(RELOID, tuple,
+                                                                Anum_pg_class_relpartbound,
+                                                                &isnull);
+       if (isnull)                                     /* should not happen */
+               elog(ERROR, "relation \"%s\" has relpartbound = null",
+                        RelationGetRelationName(rel));
+       bound = stringToNode(TextDatumGetCString(boundDatum));
+       ReleaseSysCache(tuple);
+
+       my_qual = get_qual_from_partbound(rel, parent, bound);
+
+       /* If requested, add parent's quals to the list (if any) */
+       if (parent->rd_rel->relispartition && recurse)
+       {
+               List       *parent_check;
+
+               parent_check = generate_partition_qual(parent, true);
+               result = list_concat(parent_check, my_qual);
+       }
+       else
+               result = my_qual;
+
+       /* Save a copy of my_qual in the relcache */
+       oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
+       rel->rd_partcheck = copyObject(my_qual);
+       MemoryContextSwitchTo(oldcxt);
+
+       /* Keep the parent locked until commit */
+       heap_close(parent, NoLock);
+
+       return result;
+}
+
+/* ----------------
+ *             FormPartitionKeyDatum
+ *                     Construct values[] and isnull[] arrays for the partition key
+ *                     of a tuple.
+ *
+ *     pkinfo                  partition key execution info
+ *     slot                    Heap tuple from which to extract partition key
+ *     estate                  executor state for evaluating any partition key
+ *                                     expressions (must be non-NULL)
+ *     values                  Array of partition key Datums (output area)
+ *     isnull                  Array of is-null indicators (output area)
+ *
+ * the ecxt_scantuple slot of estate's per-tuple expr context must point to
+ * the heap tuple passed in.
+ * ----------------
+ */
+static void
+FormPartitionKeyDatum(PartitionDispatch pd,
+                                         TupleTableSlot *slot,
+                                         EState *estate,
+                                         Datum *values,
+                                         bool *isnull)
+{
+       ListCell   *partexpr_item;
+       int                     i;
+
+       if (pd->key->partexprs != NIL && pd->keystate == NIL)
+       {
+               /* Check caller has set up context correctly */
+               Assert(estate != NULL &&
+                          GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
+
+               /* First time through, set up expression evaluation state */
+               pd->keystate = (List *) ExecPrepareExpr((Expr *) pd->key->partexprs,
+                                                                                               estate);
+       }
+
+       partexpr_item = list_head(pd->keystate);
+       for (i = 0; i < pd->key->partnatts; i++)
+       {
+               AttrNumber      keycol = pd->key->partattrs[i];
+               Datum           datum;
+               bool            isNull;
+
+               if (keycol != 0)
+               {
+                       /* Plain column; get the value directly from the heap tuple */
+                       datum = slot_getattr(slot, keycol, &isNull);
+               }
+               else
+               {
+                       /* Expression; need to evaluate it */
+                       if (partexpr_item == NULL)
+                               elog(ERROR, "wrong number of partition key expressions");
+                       datum = ExecEvalExprSwitchContext((ExprState *) lfirst(partexpr_item),
+                                                                                         GetPerTupleExprContext(estate),
+                                                                                         &isNull,
+                                                                                         NULL);
+                       partexpr_item = lnext(partexpr_item);
+               }
+               values[i] = datum;
+               isnull[i] = isNull;
+       }
+
+       if (partexpr_item != NULL)
+               elog(ERROR, "wrong number of partition key expressions");
+}
+
+/*
+ * get_partition_for_tuple
+ *             Finds a leaf partition for tuple contained in *slot
+ *
+ * Returned value is the sequence number of the leaf partition thus found,
+ * or -1 if no leaf partition is found for the tuple.  *failed_at is set
+ * to the OID of the partitioned table whose partition was not found in
+ * the latter case.
+ */
+int
+get_partition_for_tuple(PartitionDispatch * pd,
+                                               TupleTableSlot *slot,
+                                               EState *estate,
+                                               Oid *failed_at)
+{
+       PartitionDispatch parent;
+       Datum           values[PARTITION_MAX_KEYS];
+       bool            isnull[PARTITION_MAX_KEYS];
+       int                     cur_offset,
+                               cur_index;
+       int                     i;
+
+       /* start with the root partitioned table */
+       parent = pd[0];
+       while (true)
+       {
+               PartitionKey key = parent->key;
+               PartitionDesc partdesc = parent->partdesc;
+
+               /* Quick exit */
+               if (partdesc->nparts == 0)
+               {
+                       *failed_at = RelationGetRelid(parent->reldesc);
+                       return -1;
+               }
+
+               /* Extract partition key from tuple */
+               FormPartitionKeyDatum(parent, slot, estate, values, isnull);
+
+               if (key->strategy == PARTITION_STRATEGY_RANGE)
+               {
+                       /* Disallow nulls in the range partition key of the tuple */
+                       for (i = 0; i < key->partnatts; i++)
+                               if (isnull[i])
+                                       ereport(ERROR,
+                                                       (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+                                               errmsg("range partition key of row contains null")));
+               }
+
+               if (partdesc->boundinfo->has_null && isnull[0])
+                       /* Tuple maps to the null-accepting list partition */
+                       cur_index = partdesc->boundinfo->null_index;
+               else
+               {
+                       /* Else bsearch in partdesc->boundinfo */
+                       bool            equal = false;
+
+                       cur_offset = partition_bound_bsearch(key, partdesc->boundinfo,
+                                                                                                values, false, &equal);
+                       switch (key->strategy)
+                       {
+                               case PARTITION_STRATEGY_LIST:
+                                       if (cur_offset >= 0 && equal)
+                                               cur_index = partdesc->boundinfo->indexes[cur_offset];
+                                       else
+                                               cur_index = -1;
+                                       break;
+
+                               case PARTITION_STRATEGY_RANGE:
+
+                                       /*
+                                        * Offset returned is such that the bound at offset is
+                                        * found to be less or equal with the tuple. So, the bound
+                                        * at offset+1 would be the upper bound.
+                                        */
+                                       cur_index = partdesc->boundinfo->indexes[cur_offset + 1];
+                                       break;
+
+                               default:
+                                       elog(ERROR, "unexpected partition strategy: %d",
+                                                (int) key->strategy);
+                       }
+               }
+
+               /*
+                * cur_index < 0 means we failed to find a partition of this parent.
+                * cur_index >= 0 means we either found the leaf partition, or the
+                * next parent to find a partition of.
+                */
+               if (cur_index < 0)
+               {
+                       *failed_at = RelationGetRelid(parent->reldesc);
+                       return -1;
+               }
+               else if (parent->indexes[cur_index] < 0)
+                       parent = pd[-parent->indexes[cur_index]];
+               else
+                       break;
+       }
+
+       return parent->indexes[cur_index];
+}
+
+/*
+ * qsort_partition_list_value_cmp
+ *
+ * Compare two list partition bound datums
+ */
+static int32
+qsort_partition_list_value_cmp(const void *a, const void *b, void *arg)
+{
+       Datum           val1 = (*(const PartitionListValue **) a)->value,
+                               val2 = (*(const PartitionListValue **) b)->value;
+       PartitionKey key = (PartitionKey) arg;
+
+       return DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
+                                                                                  key->partcollation[0],
+                                                                                  val1, val2));
+}
+
+/*
+ * make_one_range_bound
+ *
+ * Return a PartitionRangeBound given a list of PartitionRangeDatum elements
+ * and a flag telling whether the bound is lower or not.  Made into a function
+ * because there are multiple sites that want to use this facility.
+ */
+static PartitionRangeBound *
+make_one_range_bound(PartitionKey key, int index, List *datums, bool lower)
+{
+       PartitionRangeBound *bound;
+       ListCell   *cell;
+       int                     i;
+
+       bound = (PartitionRangeBound *) palloc0(sizeof(PartitionRangeBound));
+       bound->index = index;
+       bound->datums = (Datum *) palloc0(key->partnatts * sizeof(Datum));
+       bound->content = (RangeDatumContent *) palloc0(key->partnatts *
+                                                                                                  sizeof(RangeDatumContent));
+       bound->lower = lower;
+
+       i = 0;
+       foreach(cell, datums)
+       {
+               PartitionRangeDatum *datum = lfirst(cell);
+
+               /* What's contained in this range datum? */
+               bound->content[i] = !datum->infinite
+                       ? RANGE_DATUM_FINITE
+                       : (lower ? RANGE_DATUM_NEG_INF
+                          : RANGE_DATUM_POS_INF);
+
+               if (bound->content[i] == RANGE_DATUM_FINITE)
+               {
+                       Const      *val = (Const *) datum->value;
+
+                       if (val->constisnull)
+                               elog(ERROR, "invalid range bound datum");
+                       bound->datums[i] = val->constvalue;
+               }
+
+               i++;
+       }
+
+       return bound;
+}
+
+/* Used when sorting range bounds across all range partitions */
+static int32
+qsort_partition_rbound_cmp(const void *a, const void *b, void *arg)
+{
+       PartitionRangeBound *b1 = (*(PartitionRangeBound *const *) a);
+       PartitionRangeBound *b2 = (*(PartitionRangeBound *const *) b);
+       PartitionKey key = (PartitionKey) arg;
+
+       return partition_rbound_cmp(key, b1->datums, b1->content, b1->lower, b2);
+}
+
+/*
+ * partition_rbound_cmp
+ *
+ * Return for two range bounds whether the 1st one (specified in datum1,
+ * content1, and lower1) is <=, =, >= the bound specified in *b2
+ */
+static int32
+partition_rbound_cmp(PartitionKey key,
+                                        Datum *datums1, RangeDatumContent *content1, bool lower1,
+                                        PartitionRangeBound *b2)
+{
+       int32           cmpval;
+       int                     i;
+       Datum      *datums2 = b2->datums;
+       RangeDatumContent *content2 = b2->content;
+       bool            lower2 = b2->lower;
+
+       for (i = 0; i < key->partnatts; i++)
+       {
+               /*
+                * First, handle cases involving infinity, which don't require
+                * invoking the comparison proc.
+                */
+               if (content1[i] != RANGE_DATUM_FINITE &&
+                       content2[i] != RANGE_DATUM_FINITE)
+
+                       /*
+                        * Both are infinity, so they are equal unless one is negative
+                        * infinity and other positive (or vice versa)
+                        */
+                       return content1[i] == content2[i] ? 0
+                               : (content1[i] < content2[i] ? -1 : 1);
+               else if (content1[i] != RANGE_DATUM_FINITE)
+                       return content1[i] == RANGE_DATUM_NEG_INF ? -1 : 1;
+               else if (content2[i] != RANGE_DATUM_FINITE)
+                       return content2[i] == RANGE_DATUM_NEG_INF ? 1 : -1;
+
+               cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[i],
+                                                                                                key->partcollation[i],
+                                                                                                datums1[i],
+                                                                                                datums2[i]));
+               if (cmpval != 0)
+                       break;
+       }
+
+       /*
+        * If the comparison is anything other than equal, we're done. If they
+        * compare equal though, we still have to consider whether the boundaries
+        * are inclusive or exclusive.  Exclusive one is considered smaller of the
+        * two.
+        */
+       if (cmpval == 0 && lower1 != lower2)
+               cmpval = lower1 ? 1 : -1;
+
+       return cmpval;
+}
+
+/*
+ * partition_rbound_datum_cmp
+ *
+ * Return whether range bound (specified in rb_datums, rb_content, and
+ * rb_lower) <=, =, >= partition key of tuple (tuple_datums)
+ */
+static int32
+partition_rbound_datum_cmp(PartitionKey key,
+                                                  Datum *rb_datums, RangeDatumContent *rb_content,
+                                                  Datum *tuple_datums)
+{
+       int                     i;
+       int32           cmpval = -1;
+
+       for (i = 0; i < key->partnatts; i++)
+       {
+               if (rb_content[i] != RANGE_DATUM_FINITE)
+                       return rb_content[i] == RANGE_DATUM_NEG_INF ? -1 : 1;
+
+               cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[i],
+                                                                                                key->partcollation[i],
+                                                                                                rb_datums[i],
+                                                                                                tuple_datums[i]));
+               if (cmpval != 0)
+                       break;
+       }
+
+       return cmpval;
+}
+
+/*
+ * partition_bound_cmp
+ *
+ * Return whether the bound at offset in boundinfo is <=, =, >= the argument
+ * specified in *probe.
+ */
+static int32
+partition_bound_cmp(PartitionKey key, PartitionBoundInfo boundinfo,
+                                       int offset, void *probe, bool probe_is_bound)
+{
+       Datum      *bound_datums = boundinfo->datums[offset];
+       int32           cmpval = -1;
+
+       switch (key->strategy)
+       {
+               case PARTITION_STRATEGY_LIST:
+                       cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
+                                                                                                        key->partcollation[0],
+                                                                                                        bound_datums[0],
+                                                                                                        *(Datum *) probe));
+                       break;
+
+               case PARTITION_STRATEGY_RANGE:
+                       {
+                               RangeDatumContent *content = boundinfo->content[offset];
+
+                               if (probe_is_bound)
+                               {
+                                       /*
+                                        * We need to pass whether the existing bound is a lower
+                                        * bound, so that two equal-valued lower and upper bounds
+                                        * are not regarded equal.
+                                        */
+                                       bool            lower = boundinfo->indexes[offset] < 0;
+
+                                       cmpval = partition_rbound_cmp(key,
+                                                                                               bound_datums, content, lower,
+                                                                                         (PartitionRangeBound *) probe);
+                               }
+                               else
+                                       cmpval = partition_rbound_datum_cmp(key,
+                                                                                                               bound_datums, content,
+                                                                                                               (Datum *) probe);
+                               break;
+                       }
+
+               default:
+                       elog(ERROR, "unexpected partition strategy: %d",
+                                (int) key->strategy);
+       }
+
+       return cmpval;
+}
+
+/*
+ * Binary search on a collection of partition bounds. Returns greatest index
+ * of bound in array boundinfo->datums which is less or equal with *probe.
+ * If all bounds in the array are greater than *probe, -1 is returned.
+ *
+ * *probe could either be a partition bound or a Datum array representing
+ * the partition key of a tuple being routed; probe_is_bound tells which.
+ * We pass that down to the comparison function so that it can interpret the
+ * contents of *probe accordingly.
+ *
+ * *is_equal is set to whether the bound at the returned index is equal with
+ * *probe.
+ */
+static int
+partition_bound_bsearch(PartitionKey key, PartitionBoundInfo boundinfo,
+                                               void *probe, bool probe_is_bound, bool *is_equal)
+{
+       int                     lo,
+                               hi,
+                               mid;
+
+       lo = -1;
+       hi = boundinfo->ndatums - 1;
+       while (lo < hi)
+       {
+               int32           cmpval;
+
+               mid = (lo + hi + 1) / 2;
+               cmpval = partition_bound_cmp(key, boundinfo, mid, probe,
+                                                                        probe_is_bound);
+               if (cmpval <= 0)
+               {
+                       lo = mid;
+                       *is_equal = (cmpval == 0);
+               }
+               else
+                       hi = mid - 1;
+       }
+
+       return lo;
+}
index 8fabe6899f65796e9c4495c6d27dd0ffdc93f7be..724b41e64cdee99c6353f0e605ea10219b9dfbc9 100644 (file)
@@ -368,7 +368,7 @@ CreateConstraintEntry(const char *constraintName,
                 */
                recordDependencyOnSingleRelExpr(&conobject, conExpr, relId,
                                                                                DEPENDENCY_NORMAL,
-                                                                               DEPENDENCY_NORMAL);
+                                                                               DEPENDENCY_NORMAL, false);
        }
 
        /* Post creation hook for new constraint */
index c617abb223b07e2ee430e81861896213f6efdd36..f4afcd9aae14be9d13e5e12e949979896c6cb5fd 100644 (file)
@@ -201,7 +201,8 @@ analyze_rel(Oid relid, RangeVar *relation, int options,
         * locked the relation.
         */
        if (onerel->rd_rel->relkind == RELKIND_RELATION ||
-               onerel->rd_rel->relkind == RELKIND_MATVIEW)
+               onerel->rd_rel->relkind == RELKIND_MATVIEW ||
+               onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
        {
                /* Regular table, so we'll use the regular row acquisition function */
                acquirefunc = acquire_sample_rows;
@@ -1317,7 +1318,8 @@ acquire_inherited_sample_rows(Relation onerel, int elevel,
 
                /* Check table type (MATVIEW can't happen, but might as well allow) */
                if (childrel->rd_rel->relkind == RELKIND_RELATION ||
-                       childrel->rd_rel->relkind == RELKIND_MATVIEW)
+                       childrel->rd_rel->relkind == RELKIND_MATVIEW ||
+                       childrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
                {
                        /* Regular table, so use the regular row acquisition function */
                        acquirefunc = acquire_sample_rows;
index ec5d6f15659f590128f218f373d663ae02e65e2b..270be0af18e4a5b9e75202a306d70cd7a234d599 100644 (file)
@@ -161,6 +161,11 @@ typedef struct CopyStateData
        ExprState **defexprs;           /* array of default att expressions */
        bool            volatile_defexprs;              /* is any of defexprs volatile? */
        List       *range_table;
+       PartitionDispatch          *partition_dispatch_info;
+       int                                             num_dispatch;
+       int                                             num_partitions;
+       ResultRelInfo              *partitions;
+       TupleConversionMap        **partition_tupconv_maps;
 
        /*
         * These variables are used to reduce overhead in textual COPY FROM.
@@ -1397,6 +1402,71 @@ BeginCopy(ParseState *pstate,
                                        (errcode(ERRCODE_UNDEFINED_COLUMN),
                                         errmsg("table \"%s\" does not have OIDs",
                                                        RelationGetRelationName(cstate->rel))));
+
+               /*
+                * Initialize state for CopyFrom tuple routing.  Watch out for
+                * any foreign partitions.
+                */
+               if (is_from && rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+               {
+                       PartitionDispatch *pd;
+                       List               *leaf_parts;
+                       ListCell           *cell;
+                       int                             i,
+                                                       num_parted,
+                                                       num_leaf_parts;
+                       ResultRelInfo  *leaf_part_rri;
+
+                       /* Get the tuple-routing information and lock partitions */
+                       pd = RelationGetPartitionDispatchInfo(rel, RowExclusiveLock,
+                                                                                                 &num_parted, &leaf_parts);
+                       num_leaf_parts = list_length(leaf_parts);
+                       cstate->partition_dispatch_info = pd;
+                       cstate->num_dispatch = num_parted;
+                       cstate->num_partitions = num_leaf_parts;
+                       cstate->partitions = (ResultRelInfo *) palloc(num_leaf_parts *
+                                                                                                               sizeof(ResultRelInfo));
+                       cstate->partition_tupconv_maps = (TupleConversionMap **)
+                                               palloc0(num_leaf_parts * sizeof(TupleConversionMap *));
+
+                       leaf_part_rri = cstate->partitions;
+                       i = 0;
+                       foreach(cell, leaf_parts)
+                       {
+                               Relation        partrel;
+
+                               /*
+                                * We locked all the partitions above including the leaf
+                                * partitions.  Note that each of the relations in
+                                * cstate->partitions will be closed by CopyFrom() after
+                                * it's finished with its processing.
+                                */
+                               partrel = heap_open(lfirst_oid(cell), NoLock);
+
+                               /*
+                                * Verify result relation is a valid target for the current
+                                * operation.
+                                */
+                               CheckValidResultRel(partrel, CMD_INSERT);
+
+                               InitResultRelInfo(leaf_part_rri,
+                                                                 partrel,
+                                                                 1,     /* dummy */
+                                                                 false, /* no partition constraint check */
+                                                                 0);
+
+                               /* Open partition indices */
+                               ExecOpenIndices(leaf_part_rri, false);
+
+                               if (!equalTupleDescs(tupDesc, RelationGetDescr(partrel)))
+                                       cstate->partition_tupconv_maps[i] =
+                                                               convert_tuples_by_name(tupDesc,
+                                                                       RelationGetDescr(partrel),
+                                                                       gettext_noop("could not convert row type"));
+                               leaf_part_rri++;
+                               i++;
+                       }
+               }
        }
        else
        {
@@ -1751,6 +1821,12 @@ BeginCopyTo(ParseState *pstate,
                                        (errcode(ERRCODE_WRONG_OBJECT_TYPE),
                                         errmsg("cannot copy from sequence \"%s\"",
                                                        RelationGetRelationName(rel))));
+               else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                                        errmsg("cannot copy from partitioned table \"%s\"",
+                                                       RelationGetRelationName(rel)),
+                                        errhint("Try the COPY (SELECT ...) TO variant.")));
                else
                        ereport(ERROR,
                                        (errcode(ERRCODE_WRONG_OBJECT_TYPE),
@@ -2249,6 +2325,7 @@ CopyFrom(CopyState cstate)
        Datum      *values;
        bool       *nulls;
        ResultRelInfo *resultRelInfo;
+       ResultRelInfo *saved_resultRelInfo = NULL;
        EState     *estate = CreateExecutorState(); /* for ExecConstraints() */
        ExprContext *econtext;
        TupleTableSlot *myslot;
@@ -2275,6 +2352,7 @@ CopyFrom(CopyState cstate)
         * only hint about them in the view case.)
         */
        if (cstate->rel->rd_rel->relkind != RELKIND_RELATION &&
+               cstate->rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE &&
                !(cstate->rel->trigdesc &&
                  cstate->rel->trigdesc->trig_insert_instead_row))
        {
@@ -2385,6 +2463,7 @@ CopyFrom(CopyState cstate)
        InitResultRelInfo(resultRelInfo,
                                          cstate->rel,
                                          1,            /* dummy rangetable index */
+                                         true,         /* do load partition check expression */
                                          0);
 
        ExecOpenIndices(resultRelInfo, false);
@@ -2407,11 +2486,13 @@ CopyFrom(CopyState cstate)
         * BEFORE/INSTEAD OF triggers, or we need to evaluate volatile default
         * expressions. Such triggers or expressions might query the table we're
         * inserting to, and act differently if the tuples that have already been
-        * processed and prepared for insertion are not there.
+        * processed and prepared for insertion are not there.  We also can't
+        * do it if the table is partitioned.
         */
        if ((resultRelInfo->ri_TrigDesc != NULL &&
                 (resultRelInfo->ri_TrigDesc->trig_insert_before_row ||
                  resultRelInfo->ri_TrigDesc->trig_insert_instead_row)) ||
+               cstate->partition_dispatch_info != NULL ||
                cstate->volatile_defexprs)
        {
                useHeapMultiInsert = false;
@@ -2488,6 +2569,59 @@ CopyFrom(CopyState cstate)
                slot = myslot;
                ExecStoreTuple(tuple, slot, InvalidBuffer, false);
 
+               /* Determine the partition to heap_insert the tuple into */
+               if (cstate->partition_dispatch_info)
+               {
+                       int             leaf_part_index;
+                       TupleConversionMap *map;
+
+                       /*
+                        * Away we go ... If we end up not finding a partition after all,
+                        * ExecFindPartition() does not return and errors out instead.
+                        * Otherwise, the returned value is to be used as an index into
+                        * arrays mt_partitions[] and mt_partition_tupconv_maps[] that
+                        * will get us the ResultRelInfo and TupleConversionMap for the
+                        * partition, respectively.
+                        */
+                       leaf_part_index = ExecFindPartition(resultRelInfo,
+                                                                                       cstate->partition_dispatch_info,
+                                                                                               slot,
+                                                                                               estate);
+                       Assert(leaf_part_index >= 0 &&
+                                  leaf_part_index < cstate->num_partitions);
+
+                       /*
+                        * Save the old ResultRelInfo and switch to the one corresponding
+                        * to the selected partition.
+                        */
+                       saved_resultRelInfo = resultRelInfo;
+                       resultRelInfo = cstate->partitions + leaf_part_index;
+
+                       /* We do not yet have a way to insert into a foreign partition */
+                       if (resultRelInfo->ri_FdwRoutine)
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                                errmsg("cannot route inserted tuples to a foreign table")));
+
+                       /*
+                        * For ExecInsertIndexTuples() to work on the partition's indexes
+                        */
+                       estate->es_result_relation_info = resultRelInfo;
+
+                       /*
+                        * We might need to convert from the parent rowtype to the
+                        * partition rowtype.
+                        */
+                       map = cstate->partition_tupconv_maps[leaf_part_index];
+                       if (map)
+                       {
+                               tuple = do_convert_tuple(tuple, map);
+                               ExecStoreTuple(tuple, slot, InvalidBuffer, true);
+                       }
+
+                       tuple->t_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
+               }
+
                skip_tuple = false;
 
                /* BEFORE ROW INSERT Triggers */
@@ -2513,7 +2647,8 @@ CopyFrom(CopyState cstate)
                        else
                        {
                                /* Check the constraints of the tuple */
-                               if (cstate->rel->rd_att->constr)
+                               if (cstate->rel->rd_att->constr ||
+                                       resultRelInfo->ri_PartitionCheck)
                                        ExecConstraints(resultRelInfo, slot, estate);
 
                                if (useHeapMultiInsert)
@@ -2546,7 +2681,8 @@ CopyFrom(CopyState cstate)
                                        List       *recheckIndexes = NIL;
 
                                        /* OK, store the tuple and create index entries for it */
-                                       heap_insert(cstate->rel, tuple, mycid, hi_options, bistate);
+                                       heap_insert(resultRelInfo->ri_RelationDesc, tuple, mycid,
+                                                               hi_options, bistate);
 
                                        if (resultRelInfo->ri_NumIndices > 0)
                                                recheckIndexes = ExecInsertIndexTuples(slot,
@@ -2570,6 +2706,12 @@ CopyFrom(CopyState cstate)
                         * tuples inserted by an INSERT command.
                         */
                        processed++;
+
+                       if (saved_resultRelInfo)
+                       {
+                               resultRelInfo = saved_resultRelInfo;
+                               estate->es_result_relation_info = resultRelInfo;
+                       }
                }
        }
 
@@ -2607,6 +2749,32 @@ CopyFrom(CopyState cstate)
 
        ExecCloseIndices(resultRelInfo);
 
+       /* Close all the partitioned tables, leaf partitions, and their indices */
+       if (cstate->partition_dispatch_info)
+       {
+               int             i;
+
+               /*
+                * Remember cstate->partition_dispatch_info[0] corresponds to the root
+                * partitioned table, which we must not try to close, because it is
+                * the main target table of COPY that will be closed eventually by
+                * DoCopy().
+                */
+               for (i = 1; i < cstate->num_dispatch; i++)
+               {
+                       PartitionDispatch pd = cstate->partition_dispatch_info[i];
+
+                       heap_close(pd->reldesc, NoLock);
+               }
+               for (i = 0; i < cstate->num_partitions; i++)
+               {
+                       ResultRelInfo *resultRelInfo = cstate->partitions + i;
+
+                       ExecCloseIndices(resultRelInfo);
+                       heap_close(resultRelInfo->ri_RelationDesc, NoLock);
+               }
+       }
+
        FreeExecutorState(estate);
 
        /*
index 5b4f6affcce20d4b3970feb4149f89cc2ed747fe..d6d52d99295bd20480b3e588c733a15d6c04d6b5 100644 (file)
@@ -112,7 +112,7 @@ create_ctas_internal(List *attrList, IntoClause *into)
         * Create the relation.  (This will error out if there's an existing view,
         * so we don't need more code to complain if "replace" is false.)
         */
-       intoRelationAddr = DefineRelation(create, relkind, InvalidOid, NULL);
+       intoRelationAddr = DefineRelation(create, relkind, InvalidOid, NULL, NULL);
 
        /*
         * If necessary, create a TOAST table for the target table.  Note that
index 85817c6530252dc0b2bc361a6342eb943f7e6678..eeb2b1fe80d2b68a5a8346f5dd797313b3008e09 100644 (file)
@@ -69,8 +69,6 @@ static void ComputeIndexAttrs(IndexInfo *indexInfo,
                                  char *accessMethodName, Oid accessMethodId,
                                  bool amcanorder,
                                  bool isconstraint);
-static Oid GetIndexOpClass(List *opclass, Oid attrType,
-                               char *accessMethodName, Oid accessMethodId);
 static char *ChooseIndexName(const char *tabname, Oid namespaceId,
                                List *colnames, List *exclusionOpNames,
                                bool primary, bool isconstraint);
@@ -383,6 +381,11 @@ DefineIndex(Oid relationId,
                                        (errcode(ERRCODE_WRONG_OBJECT_TYPE),
                                         errmsg("cannot create index on foreign table \"%s\"",
                                                        RelationGetRelationName(rel))));
+               else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                                        errmsg("cannot create index on partitioned table \"%s\"",
+                                                       RelationGetRelationName(rel))));
                else
                        ereport(ERROR,
                                        (errcode(ERRCODE_WRONG_OBJECT_TYPE),
@@ -1145,10 +1148,10 @@ ComputeIndexAttrs(IndexInfo *indexInfo,
                /*
                 * Identify the opclass to use.
                 */
-               classOidP[attn] = GetIndexOpClass(attribute->opclass,
-                                                                                 atttype,
-                                                                                 accessMethodName,
-                                                                                 accessMethodId);
+               classOidP[attn] = ResolveOpClass(attribute->opclass,
+                                                                                atttype,
+                                                                                accessMethodName,
+                                                                                accessMethodId);
 
                /*
                 * Identify the exclusion operator, if any.
@@ -1255,10 +1258,13 @@ ComputeIndexAttrs(IndexInfo *indexInfo,
 
 /*
  * Resolve possibly-defaulted operator class specification
+ *
+ * Note: This is used to resolve operator class specification in index and
+ * partition key definitions.
  */
-static Oid
-GetIndexOpClass(List *opclass, Oid attrType,
-                               char *accessMethodName, Oid accessMethodId)
+Oid
+ResolveOpClass(List *opclass, Oid attrType,
+                          char *accessMethodName, Oid accessMethodId)
 {
        char       *schemaname;
        char       *opcname;
index a0c0d75977b0314d532a7ef92b335a84afc23602..9e62e00b8dc0341d0bc61dcab8e83f1774b4f4a4 100644 (file)
@@ -87,7 +87,7 @@ RangeVarCallbackForLockTable(const RangeVar *rv, Oid relid, Oid oldrelid,
                                                                 * check */
 
        /* Currently, we only allow plain tables to be locked */
-       if (relkind != RELKIND_RELATION)
+       if (relkind != RELKIND_RELATION && relkind != RELKIND_PARTITIONED_TABLE)
                ereport(ERROR,
                                (errcode(ERRCODE_WRONG_OBJECT_TYPE),
                                 errmsg("\"%s\" is not a table",
index 70e22c100001b776594e404bdcd3df7217716e38..6da3205c9e15d6039ee36f5db6b8c0bdcdcd9edb 100644 (file)
@@ -88,7 +88,7 @@ RangeVarCallbackForPolicy(const RangeVar *rv, Oid relid, Oid oldrelid,
                                                rv->relname)));
 
        /* Relation type MUST be a table. */
-       if (relkind != RELKIND_RELATION)
+       if (relkind != RELKIND_RELATION && relkind != RELKIND_PARTITIONED_TABLE)
                ereport(ERROR,
                                (errcode(ERRCODE_WRONG_OBJECT_TYPE),
                                 errmsg("\"%s\" is not a table", rv->relname)));
@@ -384,7 +384,8 @@ RemovePolicyById(Oid policy_id)
        relid = ((Form_pg_policy) GETSTRUCT(tuple))->polrelid;
 
        rel = heap_open(relid, AccessExclusiveLock);
-       if (rel->rd_rel->relkind != RELKIND_RELATION)
+       if (rel->rd_rel->relkind != RELKIND_RELATION &&
+               rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
                ereport(ERROR,
                                (errcode(ERRCODE_WRONG_OBJECT_TYPE),
                                 errmsg("\"%s\" is not a table",
index 5bd7e124c184d477912a0249466da2e6a7a4ec0e..2b0ae34830179d720a095f719963c464a35ed62f 100644 (file)
@@ -110,7 +110,8 @@ ExecSecLabelStmt(SecLabelStmt *stmt)
                                relation->rd_rel->relkind != RELKIND_VIEW &&
                                relation->rd_rel->relkind != RELKIND_MATVIEW &&
                                relation->rd_rel->relkind != RELKIND_COMPOSITE_TYPE &&
-                               relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
+                               relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
+                               relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
                                ereport(ERROR,
                                                (errcode(ERRCODE_WRONG_OBJECT_TYPE),
                                                 errmsg("\"%s\" is not a table, view, materialized view, composite type, or foreign table",
index 7e37108b8d673d424a8473f21dde3a7f8031f717..d953b4408bddf3a80e7b2807d3d939c1b1db1430 100644 (file)
@@ -234,7 +234,7 @@ DefineSequence(ParseState *pstate, CreateSeqStmt *seq)
        stmt->tablespacename = NULL;
        stmt->if_not_exists = seq->if_not_exists;
 
-       address = DefineRelation(stmt, RELKIND_SEQUENCE, seq->ownerId, NULL);
+       address = DefineRelation(stmt, RELKIND_SEQUENCE, seq->ownerId, NULL, NULL);
        seqoid = address.objectId;
        Assert(seqoid != InvalidOid);
 
@@ -1475,7 +1475,8 @@ process_owned_by(Relation seqrel, List *owned_by)
 
                /* Must be a regular or foreign table */
                if (!(tablerel->rd_rel->relkind == RELKIND_RELATION ||
-                         tablerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE))
+                         tablerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE ||
+                         tablerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE))
                        ereport(ERROR,
                                        (errcode(ERRCODE_WRONG_OBJECT_TYPE),
                                         errmsg("referenced relation \"%s\" is not a table or foreign table",
index 6322fa75a76d12cce948779821fb4e3531995aa8..c77b216d4f775ecaed21db596af1d7db6436e5e8 100644 (file)
@@ -29,6 +29,7 @@
 #include "catalog/indexing.h"
 #include "catalog/namespace.h"
 #include "catalog/objectaccess.h"
+#include "catalog/partition.h"
 #include "catalog/pg_am.h"
 #include "catalog/pg_collation.h"
 #include "catalog/pg_constraint.h"
@@ -65,6 +66,9 @@
 #include "nodes/parsenodes.h"
 #include "optimizer/clauses.h"
 #include "optimizer/planner.h"
+#include "optimizer/predtest.h"
+#include "optimizer/prep.h"
+#include "optimizer/var.h"
 #include "parser/parse_clause.h"
 #include "parser/parse_coerce.h"
 #include "parser/parse_collate.h"
@@ -162,6 +166,7 @@ typedef struct AlteredTableInfo
        Oid                     newTableSpace;  /* new tablespace; 0 means no change */
        bool            chgPersistence; /* T if SET LOGGED/UNLOGGED is used */
        char            newrelpersistence;              /* if above is true */
+       List       *partition_constraint; /* for attach partition validation */
        /* Objects to rebuild after completing ALTER TYPE operations */
        List       *changedConstraintOids;      /* OIDs of constraints to rebuild */
        List       *changedConstraintDefs;      /* string definitions of same */
@@ -252,6 +257,12 @@ static const struct dropmsgstrings dropmsgstringarray[] = {
                gettext_noop("foreign table \"%s\" does not exist, skipping"),
                gettext_noop("\"%s\" is not a foreign table"),
        gettext_noop("Use DROP FOREIGN TABLE to remove a foreign table.")},
+       {RELKIND_PARTITIONED_TABLE,
+               ERRCODE_UNDEFINED_TABLE,
+               gettext_noop("table \"%s\" does not exist"),
+               gettext_noop("table \"%s\" does not exist, skipping"),
+               gettext_noop("\"%s\" is not a table"),
+       gettext_noop("Use DROP TABLE to remove a table.")},
        {'\0', 0, NULL, NULL, NULL, NULL}
 };
 
@@ -272,7 +283,8 @@ struct DropRelationCallbackState
 
 static void truncate_check_rel(Relation rel);
 static List *MergeAttributes(List *schema, List *supers, char relpersistence,
-                               List **supOids, List **supconstr, int *supOidCount);
+                               bool is_partition, List **supOids, List **supconstr,
+                               int *supOidCount);
 static bool MergeCheckConstraint(List *constraints, char *name, Node *expr);
 static void MergeAttributesIntoExisting(Relation child_rel, Relation parent_rel);
 static void MergeConstraintsIntoExisting(Relation child_rel, Relation parent_rel);
@@ -339,7 +351,9 @@ static void add_column_datatype_dependency(Oid relid, int32 attnum, Oid typid);
 static void add_column_collation_dependency(Oid relid, int32 attnum, Oid collid);
 static void ATPrepAddOids(List **wqueue, Relation rel, bool recurse,
                          AlterTableCmd *cmd, LOCKMODE lockmode);
+static void ATPrepDropNotNull(Relation rel, bool recurse, bool recursing);
 static ObjectAddress ATExecDropNotNull(Relation rel, const char *colName, LOCKMODE lockmode);
+static void ATPrepSetNotNull(Relation rel, bool recurse, bool recursing);
 static ObjectAddress ATExecSetNotNull(AlteredTableInfo *tab, Relation rel,
                                 const char *colName, LOCKMODE lockmode);
 static ObjectAddress ATExecColumnDefault(Relation rel, const char *colName,
@@ -433,6 +447,15 @@ static void RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid,
                                                                Oid oldRelOid, void *arg);
 static void RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid,
                                                                 Oid oldrelid, void *arg);
+static bool is_partition_attr(Relation rel, AttrNumber attnum, bool *used_in_expr);
+static PartitionSpec *transformPartitionSpec(Relation rel, PartitionSpec *partspec, char *strategy);
+static void ComputePartitionAttrs(Relation rel, List *partParams, AttrNumber *partattrs,
+                                         List **partexprs, Oid *partopclass, Oid *partcollation);
+static void CreateInheritance(Relation child_rel, Relation parent_rel);
+static void RemoveInheritance(Relation child_rel, Relation parent_rel);
+static ObjectAddress ATExecAttachPartition(List **wqueue, Relation rel,
+                                               PartitionCmd *cmd);
+static ObjectAddress ATExecDetachPartition(Relation rel, RangeVar *name);
 
 
 /* ----------------------------------------------------------------
@@ -455,7 +478,7 @@ static void RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid,
  */
 ObjectAddress
 DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
-                          ObjectAddress *typaddress)
+                          ObjectAddress *typaddress, const char *queryString)
 {
        char            relname[NAMEDATALEN];
        Oid                     namespaceId;
@@ -492,6 +515,14 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
                                (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
                                 errmsg("ON COMMIT can only be used on temporary tables")));
 
+       if (stmt->partspec != NULL)
+       {
+               if (relkind != RELKIND_RELATION)
+                       elog(ERROR, "unexpected relkind: %d", (int) relkind);
+
+               relkind = RELKIND_PARTITIONED_TABLE;
+       }
+
        /*
         * Look up the namespace in which we are supposed to create the relation,
         * check we have permission to create there, lock it against concurrent
@@ -578,6 +609,7 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
         */
        schema = MergeAttributes(schema, stmt->inhRelations,
                                                         stmt->relation->relpersistence,
+                                                        stmt->partbound != NULL,
                                                         &inheritOids, &old_constraints, &parentOidCount);
 
        /*
@@ -588,17 +620,33 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
        descriptor = BuildDescForRelation(schema);
 
        /*
-        * Notice that we allow OIDs here only for plain tables, even though some
-        * other relkinds can support them.  This is necessary because the
-        * default_with_oids GUC must apply only to plain tables and not any other
-        * relkind; doing otherwise would break existing pg_dump files.  We could
-        * allow explicit "WITH OIDS" while not allowing default_with_oids to
-        * affect other relkinds, but it would complicate interpretOidsOption().
+        * Notice that we allow OIDs here only for plain tables and partitioned
+        * tables, even though some other relkinds can support them.  This is
+        * necessary because the default_with_oids GUC must apply only to plain
+        * tables and not any other relkind; doing otherwise would break existing
+        * pg_dump files.  We could allow explicit "WITH OIDS" while not allowing
+        * default_with_oids to affect other relkinds, but it would complicate
+        * interpretOidsOption().
         */
        localHasOids = interpretOidsOption(stmt->options,
-                                                                          (relkind == RELKIND_RELATION));
+                                                                          (relkind == RELKIND_RELATION ||
+                                                                               relkind == RELKIND_PARTITIONED_TABLE));
        descriptor->tdhasoid = (localHasOids || parentOidCount > 0);
 
+       if (stmt->partbound)
+       {
+               /* If the parent has OIDs, partitions must have them too. */
+               if (parentOidCount > 0 && !localHasOids)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                                        errmsg("cannot create table without OIDs as partition of table with OIDs")));
+               /* If the parent doesn't, partitions must not have them. */
+               if (parentOidCount == 0 && localHasOids)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                                        errmsg("cannot create table with OIDs as partition of table without OIDs")));
+       }
+
        /*
         * Find columns with default values and prepare for insertion of the
         * defaults.  Pre-cooked (that is, inherited) defaults go into a list of
@@ -697,6 +745,110 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
         */
        rel = relation_open(relationId, AccessExclusiveLock);
 
+       /* Process and store partition bound, if any. */
+       if (stmt->partbound)
+       {
+               Node       *bound;
+               ParseState *pstate;
+               Oid                     parentId = linitial_oid(inheritOids);
+               Relation        parent;
+
+               /* Already have strong enough lock on the parent */
+               parent = heap_open(parentId, NoLock);
+
+               /*
+                * We are going to try to validate the partition bound specification
+                * against the partition key of parentRel, so it better have one.
+                */
+               if (parent->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+                                        errmsg("\"%s\" is not partitioned",
+                                                       RelationGetRelationName(parent))));
+
+               /* Tranform the bound values */
+               pstate = make_parsestate(NULL);
+               pstate->p_sourcetext = queryString;
+               bound = transformPartitionBound(pstate, parent, stmt->partbound);
+
+               /*
+                * Check first that the new partition's bound is valid and does not
+                * overlap with any of existing partitions of the parent - note that
+                * it does not return on error.
+                */
+               check_new_partition_bound(relname, parent, bound);
+               heap_close(parent, NoLock);
+
+               /* Update the pg_class entry. */
+               StorePartitionBound(rel, bound);
+
+               /*
+                * The code that follows may also update the pg_class tuple to update
+                * relnumchecks, so bump up the command counter to avoid the "already
+                * updated by self" error.
+                */
+               CommandCounterIncrement();
+       }
+
+       /*
+        * Process the partitioning specification (if any) and store the
+        * partition key information into the catalog.
+        */
+       if (stmt->partspec)
+       {
+               char                    strategy;
+               int                             partnatts,
+                                               i;
+               AttrNumber              partattrs[PARTITION_MAX_KEYS];
+               Oid                             partopclass[PARTITION_MAX_KEYS];
+               Oid                             partcollation[PARTITION_MAX_KEYS];
+               List               *partexprs = NIL;
+               List               *cmds = NIL;
+
+               /*
+                * We need to transform the raw parsetrees corresponding to partition
+                * expressions into executable expression trees.  Like column defaults
+                * and CHECK constraints, we could not have done the transformation
+                * earlier.
+                */
+               stmt->partspec = transformPartitionSpec(rel, stmt->partspec,
+                                                                                               &strategy);
+               ComputePartitionAttrs(rel, stmt->partspec->partParams,
+                                                         partattrs, &partexprs, partopclass,
+                                                         partcollation);
+
+               partnatts = list_length(stmt->partspec->partParams);
+               StorePartitionKey(rel, strategy, partnatts, partattrs, partexprs,
+                                                 partopclass, partcollation);
+
+               /* Force key columns to be NOT NULL when using range partitioning */
+               if (strategy == PARTITION_STRATEGY_RANGE)
+               {
+                       for (i = 0; i < partnatts; i++)
+                       {
+                               AttrNumber      partattno = partattrs[i];
+                               Form_pg_attribute attform = descriptor->attrs[partattno-1];
+
+                               if (partattno != 0 && !attform->attnotnull)
+                               {
+                                       /* Add a subcommand to make this one NOT NULL */
+                                       AlterTableCmd *cmd = makeNode(AlterTableCmd);
+
+                                       cmd->subtype = AT_SetNotNull;
+                                       cmd->name = pstrdup(NameStr(attform->attname));
+                                       cmds = lappend(cmds, cmd);
+                               }
+                       }
+
+                       /*
+                        * Although, there cannot be any partitions yet, we still need to
+                        * pass true for recurse; ATPrepSetNotNull() complains if we don't
+                        */
+                       if (cmds != NIL)
+                               AlterTableInternal(RelationGetRelid(rel), cmds, true);
+               }
+       }
+
        /*
         * Now add any newly specified column default values and CHECK constraints
         * to the new relation.  These are passed to us in the form of raw
@@ -927,6 +1079,7 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
        HeapTuple       tuple;
        struct DropRelationCallbackState *state;
        char            relkind;
+       char            expected_relkind;
        Form_pg_class classform;
        LOCKMODE        heap_lockmode;
 
@@ -955,7 +1108,19 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
                return;                                 /* concurrently dropped, so nothing to do */
        classform = (Form_pg_class) GETSTRUCT(tuple);
 
-       if (classform->relkind != relkind)
+       /*
+        * Both RELKIND_RELATION and RELKIND_PARTITIONED_TABLE are OBJECT_TABLE,
+        * but RemoveRelations() can only pass one relkind for a given relation.
+        * It chooses RELKIND_RELATION for both regular and partitioned tables.
+        * That means we must be careful before giving the wrong type error when
+        * the relation is RELKIND_PARTITIONED_TABLE.
+        */
+       if (classform->relkind == RELKIND_PARTITIONED_TABLE)
+               expected_relkind = RELKIND_RELATION;
+       else
+               expected_relkind = classform->relkind;
+
+       if (relkind != expected_relkind)
                DropErrorMsgWrongType(rel->relname, classform->relkind, relkind);
 
        /* Allow DROP to either table owner or schema owner */
@@ -1054,6 +1219,10 @@ ExecuteTruncate(TruncateStmt *stmt)
                                relids = lappend_oid(relids, childrelid);
                        }
                }
+               else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                                        errmsg("must truncate child tables too")));
        }
 
        /*
@@ -1153,6 +1322,7 @@ ExecuteTruncate(TruncateStmt *stmt)
                InitResultRelInfo(resultRelInfo,
                                                  rel,
                                                  0,    /* dummy rangetable index */
+                                                 false,
                                                  0);
                resultRelInfo++;
        }
@@ -1293,7 +1463,8 @@ truncate_check_rel(Relation rel)
        AclResult       aclresult;
 
        /* Only allow truncate on regular tables */
-       if (rel->rd_rel->relkind != RELKIND_RELATION)
+       if (rel->rd_rel->relkind != RELKIND_RELATION &&
+               rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
                ereport(ERROR,
                                (errcode(ERRCODE_WRONG_OBJECT_TYPE),
                                 errmsg("\"%s\" is not a table",
@@ -1359,6 +1530,7 @@ storage_name(char c)
  *             of ColumnDef's.) It is destructively changed.
  * 'supers' is a list of names (as RangeVar nodes) of parent relations.
  * 'relpersistence' is a persistence type of the table.
+ * 'is_partition' tells if the table is a partition
  *
  * Output arguments:
  * 'supOids' receives a list of the OIDs of the parent relations.
@@ -1410,7 +1582,8 @@ storage_name(char c)
  */
 static List *
 MergeAttributes(List *schema, List *supers, char relpersistence,
-                               List **supOids, List **supconstr, int *supOidCount)
+                               bool is_partition, List **supOids, List **supconstr,
+                               int *supOidCount)
 {
        ListCell   *entry;
        List       *inhSchema = NIL;
@@ -1420,6 +1593,7 @@ MergeAttributes(List *schema, List *supers, char relpersistence,
        bool            have_bogus_defaults = false;
        int                     child_attno;
        static Node bogus_marker = {0};         /* marks conflicting defaults */
+       List       *saved_schema = NIL;
 
        /*
         * Check for and reject tables with too many columns. We perform this
@@ -1438,6 +1612,17 @@ MergeAttributes(List *schema, List *supers, char relpersistence,
                                 errmsg("tables can have at most %d columns",
                                                MaxHeapAttributeNumber)));
 
+       /*
+        * In case of a partition, there are no new column definitions, only
+        * dummy ColumnDefs created for column constraints.  We merge these
+        * constraints inherited from the parent.
+        */
+       if (is_partition)
+       {
+               saved_schema = schema;
+               schema = NIL;
+       }
+
        /*
         * Check for duplicate names in the explicit list of attributes.
         *
@@ -1518,11 +1703,35 @@ MergeAttributes(List *schema, List *supers, char relpersistence,
                 * on the parent table, which might otherwise be attempting to clear
                 * the parent's relhassubclass field, if its previous children were
                 * recently dropped.
+                *
+                * If the child table is a partition, then we instead grab an exclusive
+                * lock on the parent because its partition descriptor will be changed
+                * by addition of the new partition.
+                */
+               if (!is_partition)
+                       relation = heap_openrv(parent, ShareUpdateExclusiveLock);
+               else
+                       relation = heap_openrv(parent, AccessExclusiveLock);
+
+               /*
+                * We do not allow partitioned tables and partitions to participate
+                * in regular inheritance.
                 */
-               relation = heap_openrv(parent, ShareUpdateExclusiveLock);
+               if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
+                       !is_partition)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                                        errmsg("cannot inherit from partitioned table \"%s\"",
+                                                       parent->relname)));
+               if (relation->rd_rel->relispartition && !is_partition)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                                        errmsg("cannot inherit from partition \"%s\"",
+                                                       parent->relname)));
 
                if (relation->rd_rel->relkind != RELKIND_RELATION &&
-                       relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
+                       relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
+                       relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
                        ereport(ERROR,
                                        (errcode(ERRCODE_WRONG_OBJECT_TYPE),
                                         errmsg("inherited relation \"%s\" is not a table or foreign table",
@@ -1532,7 +1741,9 @@ MergeAttributes(List *schema, List *supers, char relpersistence,
                        relation->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
                        ereport(ERROR,
                                        (errcode(ERRCODE_WRONG_OBJECT_TYPE),
-                                        errmsg("cannot inherit from temporary relation \"%s\"",
+                                        errmsg(!is_partition
+                                                       ? "cannot inherit from temporary relation \"%s\""
+                                                       : "cannot create a permanent relation as partition of temporary relation \"%s\"",
                                                        parent->relname)));
 
                /* If existing rel is temp, it must belong to this session */
@@ -1540,7 +1751,9 @@ MergeAttributes(List *schema, List *supers, char relpersistence,
                        !relation->rd_islocaltemp)
                        ereport(ERROR,
                                        (errcode(ERRCODE_WRONG_OBJECT_TYPE),
-                                        errmsg("cannot inherit from temporary relation of another session")));
+                                        errmsg(!is_partition
+                                                       ? "cannot inherit from temporary relation of another session"
+                                                       : "cannot create as partition of temporary relation of another session")));
 
                /*
                 * We should have an UNDER permission flag for this, but for now,
@@ -1777,9 +1990,9 @@ MergeAttributes(List *schema, List *supers, char relpersistence,
                pfree(newattno);
 
                /*
-                * Close the parent rel, but keep our ShareUpdateExclusiveLock on it
-                * until xact commit.  That will prevent someone else from deleting or
-                * ALTERing the parent before the child is committed.
+                * Close the parent rel, but keep our lock on it until xact commit.
+                * That will prevent someone else from deleting or ALTERing the parent
+                * before the child is committed.
                 */
                heap_close(relation, NoLock);
        }
@@ -1787,7 +2000,8 @@ MergeAttributes(List *schema, List *supers, char relpersistence,
        /*
         * If we had no inherited attributes, the result schema is just the
         * explicitly declared columns.  Otherwise, we need to merge the declared
-        * columns into the inherited schema list.
+        * columns into the inherited schema list.  Although, we never have any
+        * explicitly declared columns if the table is a partition.
         */
        if (inhSchema != NIL)
        {
@@ -1815,6 +2029,12 @@ MergeAttributes(List *schema, List *supers, char relpersistence,
                                Oid                     defcollid,
                                                        newcollid;
 
+                               /*
+                                * Partitions have only one parent, so conflict should never
+                                * occur
+                                */
+                               Assert(!is_partition);
+
                                /*
                                 * Yes, try to merge the two column definitions. They must
                                 * have the same type, typmod, and collation.
@@ -1896,6 +2116,56 @@ MergeAttributes(List *schema, List *supers, char relpersistence,
                                                        MaxHeapAttributeNumber)));
        }
 
+       /*
+        * Now that we have the column definition list for a partition, we can
+        * check whether the columns referenced in column option specifications
+        * actually exist.  Also, we merge the options into the corresponding
+        * column definitions.
+        */
+       if (is_partition && list_length(saved_schema) > 0)
+       {
+               schema = list_concat(schema, saved_schema);
+
+               foreach(entry, schema)
+               {
+                       ColumnDef  *coldef = lfirst(entry);
+                       ListCell   *rest = lnext(entry);
+                       ListCell   *prev = entry;
+
+                       /*
+                        * Partition column option that does not belong to a column from
+                        * the parent.  This works because the columns from the parent
+                        * come first in the list (see above).
+                        */
+                       if (coldef->typeName == NULL)
+                               ereport(ERROR,
+                                       (errcode(ERRCODE_UNDEFINED_COLUMN),
+                                        errmsg("column \"%s\" does not exist",
+                                                       coldef->colname)));
+                       while (rest != NULL)
+                       {
+                               ColumnDef  *restdef = lfirst(rest);
+                               ListCell   *next = lnext(rest);         /* need to save it in case
+                                                                                                        * we delete it */
+
+                               if (strcmp(coldef->colname, restdef->colname) == 0)
+                               {
+                                       /*
+                                        * merge the column options into the column from the
+                                        * parent
+                                        */
+                                       coldef->is_not_null = restdef->is_not_null;
+                                       coldef->raw_default = restdef->raw_default;
+                                       coldef->cooked_default = restdef->cooked_default;
+                                       coldef->constraints = restdef->constraints;
+                                       list_delete_cell(schema, rest, prev);
+                               }
+                               prev = rest;
+                               rest = next;
+                       }
+               }
+       }
+
        /*
         * If we found any conflicting parent default values, check to make sure
         * they were overridden by the child.
@@ -2166,7 +2436,8 @@ renameatt_check(Oid myrelid, Form_pg_class classform, bool recursing)
                relkind != RELKIND_MATVIEW &&
                relkind != RELKIND_COMPOSITE_TYPE &&
                relkind != RELKIND_INDEX &&
-               relkind != RELKIND_FOREIGN_TABLE)
+               relkind != RELKIND_FOREIGN_TABLE &&
+               relkind != RELKIND_PARTITIONED_TABLE)
                ereport(ERROR,
                                (errcode(ERRCODE_WRONG_OBJECT_TYPE),
                                 errmsg("\"%s\" is not a table, view, materialized view, composite type, index, or foreign table",
@@ -3057,6 +3328,11 @@ AlterTableGetLockLevel(List *cmds)
                                cmd_lockmode = AlterTableGetRelOptionsLockLevel((List *) cmd->def);
                                break;
 
+                       case AT_AttachPartition:
+                       case AT_DetachPartition:
+                               cmd_lockmode = AccessExclusiveLock;
+                               break;
+
                        default:                        /* oops */
                                elog(ERROR, "unrecognized alter table type: %d",
                                         (int) cmd->subtype);
@@ -3168,12 +3444,14 @@ ATPrepCmd(List **wqueue, Relation rel, AlterTableCmd *cmd,
                        break;
                case AT_DropNotNull:    /* ALTER COLUMN DROP NOT NULL */
                        ATSimplePermissions(rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+                       ATPrepDropNotNull(rel, recurse, recursing);
                        ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode);
                        /* No command-specific prep needed */
                        pass = AT_PASS_DROP;
                        break;
                case AT_SetNotNull:             /* ALTER COLUMN SET NOT NULL */
                        ATSimplePermissions(rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+                       ATPrepSetNotNull(rel, recurse, recursing);
                        ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode);
                        /* No command-specific prep needed */
                        pass = AT_PASS_ADD_CONSTR;
@@ -3374,6 +3652,12 @@ ATPrepCmd(List **wqueue, Relation rel, AlterTableCmd *cmd,
                        /* No command-specific prep needed */
                        pass = AT_PASS_MISC;
                        break;
+               case AT_AttachPartition:
+               case AT_DetachPartition:
+                       ATSimplePermissions(rel, ATT_TABLE);
+                       /* No command-specific prep needed */
+                       pass = AT_PASS_MISC;
+                       break;
                default:                                /* oops */
                        elog(ERROR, "unrecognized alter table type: %d",
                                 (int) cmd->subtype);
@@ -3444,7 +3728,14 @@ ATRewriteCatalogs(List **wqueue, LOCKMODE lockmode)
        {
                AlteredTableInfo *tab = (AlteredTableInfo *) lfirst(ltab);
 
-               if (tab->relkind == RELKIND_RELATION ||
+               /*
+                * If the table is source table of ATTACH PARTITION command, we did
+                * not modify anything about it that will change its toasting
+                * requirement, so no need to check.
+                */
+               if (((tab->relkind == RELKIND_RELATION ||
+                         tab->relkind == RELKIND_PARTITIONED_TABLE) &&
+                         tab->partition_constraint == NIL) ||
                        tab->relkind == RELKIND_MATVIEW)
                        AlterTableCreateToastTable(tab->relid, (Datum) 0, lockmode);
        }
@@ -3693,6 +3984,12 @@ ATExecCmd(List **wqueue, AlteredTableInfo *tab, Relation rel,
                case AT_GenericOptions:
                        ATExecGenericOptions(rel, (List *) cmd->def);
                        break;
+               case AT_AttachPartition:
+                       ATExecAttachPartition(wqueue, rel, (PartitionCmd *) cmd->def);
+                       break;
+               case AT_DetachPartition:
+                       ATExecDetachPartition(rel, ((PartitionCmd *) cmd->def)->name);
+                       break;
                default:                                /* oops */
                        elog(ERROR, "unrecognized alter table type: %d",
                                 (int) cmd->subtype);
@@ -3878,7 +4175,8 @@ ATRewriteTables(AlterTableStmt *parsetree, List **wqueue, LOCKMODE lockmode)
                         * Test the current data within the table against new constraints
                         * generated by ALTER TABLE commands, but don't rebuild data.
                         */
-                       if (tab->constraints != NIL || tab->new_notnull)
+                       if (tab->constraints != NIL || tab->new_notnull ||
+                               tab->partition_constraint != NIL)
                                ATRewriteTable(tab, InvalidOid, lockmode);
 
                        /*
@@ -3958,6 +4256,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
        CommandId       mycid;
        BulkInsertState bistate;
        int                     hi_options;
+       List       *partqualstate = NIL;
 
        /*
         * Open the relation(s).  We have surely already locked the existing
@@ -4022,6 +4321,15 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
                }
        }
 
+       /* Build expression execution states for partition check quals */
+       if (tab->partition_constraint)
+       {
+               needscan = true;
+               partqualstate = (List *)
+                                               ExecPrepareExpr((Expr *) tab->partition_constraint,
+                                                                               estate);
+       }
+
        foreach(l, tab->newvals)
        {
                NewColumnValue *ex = lfirst(l);
@@ -4211,6 +4519,11 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
                                }
                        }
 
+                       if (partqualstate && !ExecQual(partqualstate, econtext, true))
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_CHECK_VIOLATION),
+                                                errmsg("partition constraint is violated by some row")));
+
                        /* Write the tuple out to the new relation */
                        if (newrel)
                                heap_insert(newrel, tuple, mycid, hi_options, bistate);
@@ -4291,6 +4604,7 @@ ATSimplePermissions(Relation rel, int allowed_targets)
        switch (rel->rd_rel->relkind)
        {
                case RELKIND_RELATION:
+               case RELKIND_PARTITIONED_TABLE:
                        actual_target = ATT_TABLE;
                        break;
                case RELKIND_VIEW:
@@ -4407,7 +4721,8 @@ ATSimpleRecursion(List **wqueue, Relation rel,
         */
        if (recurse &&
                (rel->rd_rel->relkind == RELKIND_RELATION ||
-                rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE))
+                rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE ||
+                rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE))
        {
                Oid                     relid = RelationGetRelid(rel);
                ListCell   *child;
@@ -4527,7 +4842,8 @@ find_composite_type_dependencies(Oid typeOid, Relation origRelation,
                att = rel->rd_att->attrs[pg_depend->objsubid - 1];
 
                if (rel->rd_rel->relkind == RELKIND_RELATION ||
-                       rel->rd_rel->relkind == RELKIND_MATVIEW)
+                       rel->rd_rel->relkind == RELKIND_MATVIEW ||
+                       rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
                {
                        if (origTypeName)
                                ereport(ERROR,
@@ -4728,6 +5044,11 @@ ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel,
        if (recursing)
                ATSimplePermissions(rel, ATT_TABLE | ATT_FOREIGN_TABLE);
 
+       if (rel->rd_rel->relispartition && !recursing)
+               ereport(ERROR,
+                               (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                                errmsg("cannot add column to a partition")));
+
        attrdesc = heap_open(AttributeRelationId, RowExclusiveLock);
 
        /*
@@ -5174,6 +5495,20 @@ ATPrepAddOids(List **wqueue, Relation rel, bool recurse, AlterTableCmd *cmd, LOC
  * Return the address of the modified column.  If the column was already
  * nullable, InvalidObjectAddress is returned.
  */
+
+static void
+ATPrepDropNotNull(Relation rel, bool recurse, bool recursing)
+{
+       /*
+        * If the parent is a partitioned table, like check constraints, NOT NULL
+        * constraints must be dropped from child tables.
+        */
+       if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
+               !recurse && !recursing)
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+                                errmsg("constraint must be dropped from child tables too")));
+}
 static ObjectAddress
 ATExecDropNotNull(Relation rel, const char *colName, LOCKMODE lockmode)
 {
@@ -5249,6 +5584,45 @@ ATExecDropNotNull(Relation rel, const char *colName, LOCKMODE lockmode)
 
        list_free(indexoidlist);
 
+       /* If rel is partition, shouldn't drop NOT NULL if parent has the same */
+       if (rel->rd_rel->relispartition)
+       {
+               Oid                     parentId = get_partition_parent(RelationGetRelid(rel));
+               Relation        parent = heap_open(parentId, AccessShareLock);
+               TupleDesc       tupDesc = RelationGetDescr(parent);
+               AttrNumber      parent_attnum;
+
+               parent_attnum = get_attnum(parentId, colName);
+               if (tupDesc->attrs[parent_attnum - 1]->attnotnull)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+                                        errmsg("column \"%s\" is marked NOT NULL in parent table",
+                                                       colName)));
+               heap_close(parent, AccessShareLock);
+       }
+
+       /*
+        * If the table is a range partitioned table, check that the column
+        * is not in the partition key.
+        */
+       if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+       {
+               PartitionKey    key = RelationGetPartitionKey(rel);
+               int                             partnatts = get_partition_natts(key),
+                                               i;
+
+               for (i = 0; i < partnatts; i++)
+               {
+                       AttrNumber      partattnum = get_partition_col_attnum(key, i);
+
+                       if (partattnum == attnum)
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+                                                errmsg("column \"%s\" is in range partition key",
+                                                               colName)));
+               }
+       }
+
        /*
         * Okay, actually perform the catalog change ... if needed
         */
@@ -5281,6 +5655,21 @@ ATExecDropNotNull(Relation rel, const char *colName, LOCKMODE lockmode)
  * Return the address of the modified column.  If the column was already NOT
  * NULL, InvalidObjectAddress is returned.
  */
+
+static void
+ATPrepSetNotNull(Relation rel, bool recurse, bool recursing)
+{
+       /*
+        * If the parent is a partitioned table, like check constraints, NOT NULL
+        * constraints must be added to the child tables.
+        */
+       if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
+               !recurse && !recursing)
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+                                errmsg("constraint must be added to child tables too")));
+}
+
 static ObjectAddress
 ATExecSetNotNull(AlteredTableInfo *tab, Relation rel,
                                 const char *colName, LOCKMODE lockmode)
@@ -5419,7 +5808,8 @@ ATPrepSetStatistics(Relation rel, const char *colName, Node *newValue, LOCKMODE
        if (rel->rd_rel->relkind != RELKIND_RELATION &&
                rel->rd_rel->relkind != RELKIND_MATVIEW &&
                rel->rd_rel->relkind != RELKIND_INDEX &&
-               rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
+               rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
+               rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
                ereport(ERROR,
                                (errcode(ERRCODE_WRONG_OBJECT_TYPE),