Implement table partitioning.
authorRobert Haas <rhaas@postgresql.org>
Wed, 7 Dec 2016 18:17:43 +0000 (13:17 -0500)
committerRobert Haas <rhaas@postgresql.org>
Wed, 7 Dec 2016 18:17:55 +0000 (13:17 -0500)
Table partitioning is like table inheritance and reuses much of the
existing infrastructure, but there are some important differences.
The parent is called a partitioned table and is always empty; it may
not have indexes or non-inherited constraints, since those make no
sense for a relation with no data of its own.  The children are called
partitions and contain all of the actual data.  Each partition has an
implicit partitioning constraint.  Multiple inheritance is not
allowed, and partitioning and inheritance can't be mixed.  Partitions
can't have extra columns and may not allow nulls unless the parent
does.  Tuples inserted into the parent are automatically routed to the
correct partition, so tuple-routing ON INSERT triggers are not needed.
Tuple routing isn't yet supported for partitions which are foreign
tables, and it doesn't handle updates that cross partition boundaries.

Currently, tables can be range-partitioned or list-partitioned.  List
partitioning is limited to a single column, but range partitioning can
involve multiple columns.  A partitioning "column" can be an
expression.

Because table partitioning is less general than table inheritance, it
is hoped that it will be easier to reason about properties of
partitions, and therefore that this will serve as a better foundation
for a variety of possible optimizations, including query planner
optimizations.  The tuple routing based which this patch does based on
the implicit partitioning constraints is an example of this, but it
seems likely that many other useful optimizations are also possible.

Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat,
Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova,
Rushabh Lathia, Erik Rijkers, among others.  Minor revisions by me.

85 files changed:
doc/src/sgml/catalogs.sgml
doc/src/sgml/ref/alter_table.sgml
doc/src/sgml/ref/create_foreign_table.sgml
doc/src/sgml/ref/create_table.sgml
src/backend/access/common/reloptions.c
src/backend/catalog/Makefile
src/backend/catalog/aclchk.c
src/backend/catalog/dependency.c
src/backend/catalog/heap.c
src/backend/catalog/index.c
src/backend/catalog/objectaddress.c
src/backend/catalog/partition.c [new file with mode: 0644]
src/backend/catalog/pg_constraint.c
src/backend/commands/analyze.c
src/backend/commands/copy.c
src/backend/commands/createas.c
src/backend/commands/indexcmds.c
src/backend/commands/lockcmds.c
src/backend/commands/policy.c
src/backend/commands/seclabel.c
src/backend/commands/sequence.c
src/backend/commands/tablecmds.c
src/backend/commands/trigger.c
src/backend/commands/typecmds.c
src/backend/commands/vacuum.c
src/backend/commands/view.c
src/backend/executor/execMain.c
src/backend/executor/nodeModifyTable.c
src/backend/nodes/copyfuncs.c
src/backend/nodes/equalfuncs.c
src/backend/nodes/nodeFuncs.c
src/backend/nodes/outfuncs.c
src/backend/nodes/readfuncs.c
src/backend/optimizer/util/plancat.c
src/backend/parser/analyze.c
src/backend/parser/gram.y
src/backend/parser/parse_agg.c
src/backend/parser/parse_expr.c
src/backend/parser/parse_func.c
src/backend/parser/parse_utilcmd.c
src/backend/rewrite/rewriteDefine.c
src/backend/rewrite/rewriteHandler.c
src/backend/rewrite/rowsecurity.c
src/backend/tcop/utility.c
src/backend/utils/adt/ruleutils.c
src/backend/utils/cache/relcache.c
src/backend/utils/cache/syscache.c
src/bin/pg_dump/common.c
src/bin/pg_dump/pg_dump.c
src/bin/pg_dump/pg_dump.h
src/bin/psql/describe.c
src/bin/psql/tab-complete.c
src/include/catalog/catversion.h
src/include/catalog/dependency.h
src/include/catalog/heap.h
src/include/catalog/indexing.h
src/include/catalog/partition.h [new file with mode: 0644]
src/include/catalog/pg_class.h
src/include/catalog/pg_partitioned_table.h [new file with mode: 0644]
src/include/catalog/pg_proc.h
src/include/commands/defrem.h
src/include/commands/tablecmds.h
src/include/executor/executor.h
src/include/nodes/execnodes.h
src/include/nodes/nodes.h
src/include/nodes/parsenodes.h
src/include/parser/kwlist.h
src/include/parser/parse_node.h
src/include/parser/parse_utilcmd.h
src/include/pg_config_manual.h
src/include/utils/builtins.h
src/include/utils/rel.h
src/include/utils/syscache.h
src/test/regress/expected/alter_table.out
src/test/regress/expected/create_table.out
src/test/regress/expected/inherit.out
src/test/regress/expected/insert.out
src/test/regress/expected/sanity_check.out
src/test/regress/expected/update.out
src/test/regress/sql/alter_table.sql
src/test/regress/sql/create_table.sql
src/test/regress/sql/inherit.sql
src/test/regress/sql/insert.sql
src/test/regress/sql/update.sql
src/tools/pgindent/typedefs.list

index c4246dcd866e07be2304c0572b6f461feae20dcf..9d2e89523d65fc3999d81998d713c4052792b4b3 100644 (file)
       <entry>template data for procedural languages</entry>
      </row>
 
+     <row>
+      <entry><link linkend="catalog-pg-partitioned-table"><structname>pg_partitioned_table</structname></link></entry>
+      <entry>information about partition key of tables</entry>
+     </row>
+
      <row>
       <entry><link linkend="catalog-pg-policy"><structname>pg_policy</structname></link></entry>
       <entry>row-security policies</entry>
       <entry><type>char</type></entry>
       <entry></entry>
       <entry>
-       <literal>r</> = ordinary table, <literal>i</> = index,
+       <literal>r</> = ordinary table, <literal>P</> = partitioned table,
+       <literal>i</> = index
        <literal>S</> = sequence, <literal>v</> = view,
        <literal>m</> = materialized view,
        <literal>c</> = composite type, <literal>t</> = TOAST table,
       </entry>
      </row>
 
+     <row>
+      <entry><structfield>relispartition</structfield></entry>
+      <entry><type>bool</type></entry>
+      <entry></entry>
+      <entry>True if table is a partition</entry>
+     </row>
+
      <row>
       <entry><structfield>relfrozenxid</structfield></entry>
       <entry><type>xid</type></entry>
        Access-method-specific options, as <quote>keyword=value</> strings
       </entry>
      </row>
+
+     <row>
+      <entry><structfield>relpartbound</structfield></entry>
+      <entry><type>pg_node_tree</type></entry>
+      <entry></entry>
+      <entry>
+       If table is a partition (see <structfield>relispartition</structfield>),
+       internal representation of the partition bound
+      </entry>
+     </row>
     </tbody>
    </tgroup>
   </table>
 
  </sect1>
 
+ <sect1 id="catalog-pg-partitioned-table">
+  <title><structname>pg_partitioned_table</structname></title>
+
+  <indexterm zone="catalog-pg-partitioned-table">
+   <primary>pg_partitioned_table</primary>
+  </indexterm>
+
+  <para>
+   The catalog <structname>pg_partitioned_table</structname> stores
+   information about how tables are partitioned.
+  </para>
+
+  <table>
+   <title><structname>pg_partitioned_table</> Columns</title>
+
+   <tgroup cols="4">
+    <thead>
+     <row>
+      <entry>Name</entry>
+      <entry>Type</entry>
+      <entry>References</entry>
+      <entry>Description</entry>
+     </row>
+    </thead>
+
+    <tbody>
+
+     <row>
+      <entry><structfield>partrelid</structfield></entry>
+      <entry><type>oid</type></entry>
+      <entry><literal><link linkend="catalog-pg-class"><structname>pg_class</structname></link>.oid</literal></entry>
+      <entry>The OID of the <structname>pg_class</> entry for this partitioned table</entry>
+     </row>
+
+     <row>
+      <entry><structfield>partstrat</structfield></entry>
+      <entry><type>char</type></entry>
+      <entry></entry>
+      <entry>
+       Partitioning strategy; <literal>l</> = list partitioned table,
+       <literal>r</> = range partitioned table
+      </entry>
+     </row>
+
+     <row>
+      <entry><structfield>partnatts</structfield></entry>
+      <entry><type>int2</type></entry>
+      <entry></entry>
+      <entry>The number of columns in partition key</entry>
+     </row>
+
+     <row>
+      <entry><structfield>partattrs</structfield></entry>
+      <entry><type>int2vector</type></entry>
+      <entry><literal><link linkend="catalog-pg-attribute"><structname>pg_attribute</structname></link>.attnum</literal></entry>
+      <entry>
+       This is an array of <structfield>partnatts</structfield> values that
+       indicate which table columns are part of the partition key.  For
+       example, a value of <literal>1 3</literal> would mean that the first
+       and the third table columns make up the partition key.  A zero in this
+       array indicates that the corresponding partition key column is an
+       expression, rather than a simple column reference.
+      </entry>
+     </row>
+
+     <row>
+      <entry><structfield>partclass</structfield></entry>
+      <entry><type>oidvector</type></entry>
+      <entry><literal><link linkend="catalog-pg-opclass"><structname>pg_opclass</structname></link>.oid</literal></entry>
+      <entry>
+       For each column in the partition key, this contains the OID of the
+       operator class to use.  See
+       <link linkend="catalog-pg-opclass"><structname>pg_opclass</structname></link> for details.
+      </entry>
+     </row>
+
+     <row>
+      <entry><structfield>partcollation</structfield></entry>
+      <entry><type>oidvector</type></entry>
+      <entry><literal><link linkend="catalog-pg-opclass"><structname>pg_opclass</structname></link>.oid</literal></entry>
+      <entry>
+       For each column in the partition key, this contains the OID of the
+       the collation to use for partitioning.
+      </entry>
+     </row>
+
+     <row>
+      <entry><structfield>partexprs</structfield></entry>
+      <entry><type>pg_node_tree</type></entry>
+      <entry></entry>
+      <entry>
+       Expression trees (in <function>nodeToString()</function>
+       representation) for partition key columns that are not simple column
+       references.  This is a list with one element for each zero
+       entry in <structfield>partattrs</>.  Null if all partition key columns
+       are simple references.
+      </entry>
+     </row>
+
+    </tbody>
+   </tgroup>
+  </table>
+ </sect1>
+
  <sect1 id="catalog-pg-policy">
   <title><structname>pg_policy</structname></title>
 
index e48ccf21e4fbd35d1ae4ac75af0dcf25741bab3c..a6a43c4b302c58cd39f23ade8d97de3807724b45 100644 (file)
@@ -33,6 +33,10 @@ ALTER TABLE [ IF EXISTS ] <replaceable class="PARAMETER">name</replaceable>
     SET SCHEMA <replaceable class="PARAMETER">new_schema</replaceable>
 ALTER TABLE ALL IN TABLESPACE <replaceable class="PARAMETER">name</replaceable> [ OWNED BY <replaceable class="PARAMETER">role_name</replaceable> [, ... ] ]
     SET TABLESPACE <replaceable class="PARAMETER">new_tablespace</replaceable> [ NOWAIT ]
+ALTER TABLE [ IF EXISTS ] <replaceable class="PARAMETER">name</replaceable>
+    ATTACH PARTITION <replaceable class="PARAMETER">partition_name</replaceable> FOR VALUES <replaceable class="PARAMETER">partition_bound_spec</replaceable>
+ALTER TABLE [ IF EXISTS ] <replaceable class="PARAMETER">name</replaceable>
+    DETACH PARTITION <replaceable class="PARAMETER">partition_name</replaceable>
 
 <phrase>where <replaceable class="PARAMETER">action</replaceable> is one of:</phrase>
 
@@ -166,6 +170,12 @@ ALTER TABLE ALL IN TABLESPACE <replaceable class="PARAMETER">name</replaceable>
       values or to reject null values.  You can only use <literal>SET
       NOT NULL</> when the column contains no null values.
      </para>
+
+     <para>
+      If this table is a partition, one cannot perform <literal>DROP NOT NULL</>
+      on a column if it is marked <literal>NOT NULL</literal> in the parent
+      table.
+     </para>
     </listitem>
    </varlistentry>
 
@@ -704,13 +714,63 @@ ALTER TABLE ALL IN TABLESPACE <replaceable class="PARAMETER">name</replaceable>
     </listitem>
    </varlistentry>
 
+   <varlistentry>
+    <term><literal>ATTACH PARTITION</literal> <replaceable class="PARAMETER">partition_name</replaceable> <replaceable class="PARAMETER">partition_bound_spec</replaceable></term>
+    <listitem>
+     <para>
+      This form attaches an existing table (which might itself be partitioned)
+      as a partition of the target table using the same syntax for
+      <replaceable class="PARAMETER">partition_bound_spec</replaceable> as
+      <xref linkend="sql-createtable">.  The partition bound specification
+      must correspond to the partitioning strategy and partition key of the
+      target table.  The table to be attached must have all the same columns
+      as the target table and no more; moreover, the column types must also
+      match.  Also, it must have all the <literal>NOT NULL</literal> and
+      <literal>CHECK</literal> constraints of the target table.  Currently
+      <literal>UNIQUE</literal>, <literal>PRIMARY KEY</literal>, and
+      <literal>FOREIGN KEY</literal> constraints are not considered.
+      If any of the <literal>CHECK</literal> constraints of the table being
+      attached is marked <literal>NO INHERIT</literal>, the command will fail;
+      such a constraint must be recreated without the <literal>NO INHERIT</literal>
+      clause.
+     </para>
+
+     <para>
+      A full table scan is performed on the table being attached to check that
+      no existing row in the table violates the partition constraint.  It is
+      possible to avoid this scan by adding a valid <literal>CHECK</literal>
+      constraint to the table that would allow only the rows satisfying the
+      desired partition constraint before running this command.  It will be
+      determined using such a constraint that the table need not be scanned
+      to validate the partition constraint.  This does not work, however, if
+      any of the partition keys is an expression and the partition does not
+      accept <literal>NULL</literal> values.  If attaching a list partition
+      that will not accept <literal>NULL</literal> values, also add
+      <literal>NOT NULL</literal> constraint to the partition key column,
+      unless it's an expression.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><literal>DETACH PARTITION</literal> <replaceable class="PARAMETER">partition_name</replaceable></term>
+    <listitem>
+     <para>
+      This form detaches specified partition of the target table.  The detached
+      partition continues to exist as a standalone table, but no longer has any
+      ties to the table from which it was detached.
+     </para>
+    </listitem>
+   </varlistentry>
+
   </variablelist>
   </para>
 
   <para>
    All the actions except <literal>RENAME</literal>,
-   <literal>SET TABLESPACE</literal> and <literal>SET SCHEMA</literal>
-   can be combined into
+   <literal>SET TABLESPACE</literal>, <literal>SET SCHEMA</literal>,
+   <literal>ATTACH PARTITION</literal>, and
+   <literal>DETACH PARTITION</literal> can be combined into
    a list of multiple alterations to apply in parallel.  For example, it
    is possible to add several columns and/or alter the type of several
    columns in a single command.  This is particularly useful with large
@@ -721,8 +781,9 @@ ALTER TABLE ALL IN TABLESPACE <replaceable class="PARAMETER">name</replaceable>
    You must own the table to use <command>ALTER TABLE</>.
    To change the schema or tablespace of a table, you must also have
    <literal>CREATE</literal> privilege on the new schema or tablespace.
-   To add the table as a new child of a parent table, you must own the
-   parent table as well.
+   To add the table as a new child of a parent table, you must own the parent
+   table as well.  Also, to attach a table as a new partition of the table,
+   you must own the table being attached.
    To alter the owner, you must also be a direct or indirect member of the new
    owning role, and that role must have <literal>CREATE</literal> privilege on
    the table's schema.  (These restrictions enforce that altering the owner
@@ -938,6 +999,25 @@ ALTER TABLE ALL IN TABLESPACE <replaceable class="PARAMETER">name</replaceable>
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><replaceable class="PARAMETER">partition_name</replaceable></term>
+      <listitem>
+       <para>
+        The name of the table to attach as a new partition or to detach from this table.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term><replaceable class="PARAMETER">partition_bound_spec</replaceable></term>
+      <listitem>
+       <para>
+        The partition bound specification for a new partition.  Refer to
+        <xref linkend="sql-createtable"> for more details on the syntax of the same.
+       </para>
+      </listitem>
+     </varlistentry>
+
     </variablelist>
  </refsect1>
 
@@ -977,6 +1057,11 @@ ALTER TABLE ALL IN TABLESPACE <replaceable class="PARAMETER">name</replaceable>
     but does not require a table rewrite.
    </para>
 
+   <para>
+    Similarly, when attaching a new partition it may be scanned to verify that
+    existing rows meet the partition constraint.
+   </para>
+
    <para>
     The main reason for providing the option to specify multiple changes
     in a single <command>ALTER TABLE</> is that multiple table scans or
@@ -1047,6 +1132,9 @@ ALTER TABLE ALL IN TABLESPACE <replaceable class="PARAMETER">name</replaceable>
     COLUMN</literal> (i.e., <command>ALTER TABLE ONLY ... DROP
     COLUMN</command>) never removes any descendant columns, but
     instead marks them as independently defined rather than inherited.
+    A nonrecursive <literal>DROP COLUMN</literal> command will fail for a
+    partitioned table, because all partitions of a table must have the same
+    columns as the partitioning root.
    </para>
 
    <para>
@@ -1233,6 +1321,27 @@ ALTER TABLE distributors DROP CONSTRAINT distributors_pkey,
     ADD CONSTRAINT distributors_pkey PRIMARY KEY USING INDEX dist_id_temp_idx;
 </programlisting></para>
 
+  <para>
+   Attach a partition to range partitioned table:
+<programlisting>
+ALTER TABLE measurement
+    ATTACH PARTITION measurement_y2016m07 FOR VALUES FROM ('2016-07-01') TO ('2016-08-01');
+</programlisting></para>
+
+  <para>
+   Attach a partition to list partitioned table:
+<programlisting>
+ALTER TABLE cities
+    ATTACH PARTITION cities_west FOR VALUES IN ('Los Angeles', 'San Francisco');
+</programlisting></para>
+
+  <para>
+   Detach a partition from partitioned table:
+<programlisting>
+ALTER TABLE cities
+    DETACH PARTITION measurement_y2015m12;
+</programlisting></para>
+
  </refsect1>
 
  <refsect1>
index 413b033cb57fe0b149bc6768fad4e7810ae35f45..5d0dcf567b5d12dabb823aadb53b0da54d0fe9bc 100644 (file)
@@ -27,6 +27,15 @@ CREATE FOREIGN TABLE [ IF NOT EXISTS ] <replaceable class="PARAMETER">table_name
   SERVER <replaceable class="parameter">server_name</replaceable>
 [ OPTIONS ( <replaceable class="PARAMETER">option</replaceable> '<replaceable class="PARAMETER">value</replaceable>' [, ... ] ) ]
 
+CREATE FOREIGN TABLE [ IF NOT EXISTS ] <replaceable class="PARAMETER">table_name</replaceable>
+  PARTITION OF <replaceable class="PARAMETER">parent_table</replaceable> [ (
+  { <replaceable class="PARAMETER">column_name</replaceable> WITH OPTIONS [ <replaceable class="PARAMETER">column_constraint</replaceable> [ ... ] ]
+    | <replaceable>table_constraint</replaceable> }
+    [, ... ]
+) ] <replaceable class="PARAMETER">partition_bound_spec</replaceable>
+  SERVER <replaceable class="parameter">server_name</replaceable>
+[ OPTIONS ( <replaceable class="PARAMETER">option</replaceable> '<replaceable class="PARAMETER">value</replaceable>' [, ... ] ) ]
+
 <phrase>where <replaceable class="PARAMETER">column_constraint</replaceable> is:</phrase>
 
 [ CONSTRAINT <replaceable class="PARAMETER">constraint_name</replaceable> ]
@@ -67,6 +76,12 @@ CHECK ( <replaceable class="PARAMETER">expression</replaceable> ) [ NO INHERIT ]
    name as any existing data type in the same schema.
   </para>
 
+  <para>
+   If <literal>PARTITION OF</literal> clause is specified then the table is
+   created as a partition of <literal>parent_table</literal> with specified
+   bounds.
+  </para>
+
   <para>
    To be able to create a foreign table, you must have <literal>USAGE</literal>
    privilege on the foreign server, as well as <literal>USAGE</literal>
@@ -314,6 +329,17 @@ CREATE FOREIGN TABLE films (
 SERVER film_server;
 </programlisting></para>
 
+  <para>
+   Create foreign table <structname>measurement_y2016m07</>, which will be
+   accessed through the server <structname>server_07</>, as a partition
+   of the range partitioned table <structname>measurement</>:
+
+<programlisting>
+CREATE FOREIGN TABLE measurement_y2016m07
+    PARTITION OF measurement FOR VALUES FROM ('2016-07-01') TO ('2016-08-01')
+    SERVER server_07;
+</programlisting></para>
+
  </refsect1>
 
  <refsect1 id="SQL-CREATEFOREIGNTABLE-compatibility">
index bf2ad64d66e3a40011a06a9904bce0a70aae09d6..8bf8af302b595ab111492b29678d5cdd75300cd1 100644 (file)
@@ -28,6 +28,7 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI
     [, ... ]
 ] )
 [ INHERITS ( <replaceable>parent_table</replaceable> [, ... ] ) ]
+[ PARTITION BY { RANGE | LIST } ( { <replaceable class="parameter">column_name</replaceable> | ( <replaceable class="parameter">expression</replaceable> ) } [ COLLATE <replaceable class="parameter">collation</replaceable> ] [ <replaceable class="parameter">opclass</replaceable> ] [, ... ] ) ]
 [ WITH ( <replaceable class="PARAMETER">storage_parameter</replaceable> [= <replaceable class="PARAMETER">value</replaceable>] [, ... ] ) | WITH OIDS | WITHOUT OIDS ]
 [ ON COMMIT { PRESERVE ROWS | DELETE ROWS | DROP } ]
 [ TABLESPACE <replaceable class="PARAMETER">tablespace_name</replaceable> ]
@@ -38,6 +39,18 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI
     | <replaceable>table_constraint</replaceable> }
     [, ... ]
 ) ]
+[ PARTITION BY { RANGE | LIST } ( { <replaceable class="parameter">column_name</replaceable> | ( <replaceable class="parameter">expression</replaceable> ) } [ COLLATE <replaceable class="parameter">collation</replaceable> ] [ <replaceable class="parameter">opclass</replaceable> ] [, ... ] ) ]
+[ WITH ( <replaceable class="PARAMETER">storage_parameter</replaceable> [= <replaceable class="PARAMETER">value</replaceable>] [, ... ] ) | WITH OIDS | WITHOUT OIDS ]
+[ ON COMMIT { PRESERVE ROWS | DELETE ROWS | DROP } ]
+[ TABLESPACE <replaceable class="PARAMETER">tablespace_name</replaceable> ]
+
+CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXISTS ] <replaceable class="PARAMETER">table_name</replaceable>
+    PARTITION OF <replaceable class="PARAMETER">parent_table</replaceable> [ (
+  { <replaceable class="PARAMETER">column_name</replaceable> [ <replaceable class="PARAMETER">column_constraint</replaceable> [ ... ] ]
+    | <replaceable>table_constraint</replaceable> }
+    [, ... ]
+) ] FOR VALUES <replaceable class="PARAMETER">partition_bound_spec</replaceable>
+[ PARTITION BY { RANGE | LIST } ( { <replaceable class="parameter">column_name</replaceable> | ( <replaceable class="parameter">expression</replaceable> ) } [ COLLATE <replaceable class="parameter">collation</replaceable> ] [ <replaceable class="parameter">opclass</replaceable> ] [, ... ] ) ]
 [ WITH ( <replaceable class="PARAMETER">storage_parameter</replaceable> [= <replaceable class="PARAMETER">value</replaceable>] [, ... ] ) | WITH OIDS | WITHOUT OIDS ]
 [ ON COMMIT { PRESERVE ROWS | DELETE ROWS | DROP } ]
 [ TABLESPACE <replaceable class="PARAMETER">tablespace_name</replaceable> ]
@@ -70,6 +83,11 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI
 
 { INCLUDING | EXCLUDING } { DEFAULTS | CONSTRAINTS | INDEXES | STORAGE | COMMENTS | ALL }
 
+<phrase>and <replaceable class="PARAMETER">partition_bound_spec</replaceable> is:</phrase>
+
+{ IN ( <replaceable class="PARAMETER">expression</replaceable> [, ...] ) |
+  FROM ( { <replaceable class="PARAMETER">expression</replaceable> | UNBOUNDED } [, ...] ) TO ( { <replaceable class="PARAMETER">expression</replaceable> | UNBOUNDED } [, ...] ) }
+
 <phrase><replaceable class="PARAMETER">index_parameters</replaceable> in <literal>UNIQUE</literal>, <literal>PRIMARY KEY</literal>, and <literal>EXCLUDE</literal> constraints are:</phrase>
 
 [ WITH ( <replaceable class="PARAMETER">storage_parameter</replaceable> [= <replaceable class="PARAMETER">value</replaceable>] [, ... ] ) ]
@@ -229,6 +247,51 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI
     </listitem>
    </varlistentry>
 
+   <varlistentry>
+    <term><literal>PARTITION OF <replaceable class="PARAMETER">parent_table</replaceable></literal></term>
+    <listitem>
+     <para>
+      Creates the table as <firstterm>partition</firstterm> of the specified
+      parent table.
+     </para>
+
+     <para>
+      The partition bound specification must correspond to the partitioning
+      method and partition key of the parent table, and must not overlap with
+      any existing partition of that parent.
+     </para>
+
+     <para>
+      A partition cannot have columns other than those inherited from the
+      parent.  That includes the <structfield>oid</> column, which can be
+      specified using the <literal>WITH (OIDS)</literal> clause.
+      Defaults and constraints can optionally be specified for each of the
+      inherited columns.  One can also specify table constraints in addition
+      to those inherited from the parent.  If a check constraint with the name
+      matching one of the parent's constraint is specified, it is merged with
+      the latter, provided the specified condition is same.
+     </para>
+
+     <para>
+      Rows inserted into a partitioned table will be automatically routed to
+      the correct partition.  If no suitable partition exists, an error will
+      occur.
+     </para>
+
+     <para>
+      A partition must have the same column names and types as the table of
+      which it is a partition.  Therefore, modifications to the column names
+      or types of the partitioned table will automatically propagate to all
+      children, as will operations such as TRUNCATE which normally affect a
+      table and all of its inheritance children.  It is also possible to
+      TRUNCATE a partition individually, just as for an inheritance child.
+      Note that dropping a partition with <literal>DROP TABLE</literal>
+      requires taking an <literal>ACCESS EXCLUSIVE</literal> lock on the
+      parent table.
+     </para>
+    </listitem>
+   </varlistentry>
+
    <varlistentry>
     <term><replaceable class="PARAMETER">column_name</replaceable></term>
     <listitem>
@@ -313,6 +376,46 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI
     </listitem>
    </varlistentry>
 
+   <varlistentry>
+    <term><literal>PARTITION BY { RANGE | LIST } ( { <replaceable class="parameter">column_name</replaceable> | ( <replaceable class="parameter">expression</replaceable> ) } [ <replaceable class="parameter">opclass</replaceable> ] [, ...] ) </literal></term>
+    <listitem>
+     <para>
+      The optional <literal>PARTITION BY</literal> clause specifies a strategy
+      of partitioning the table.  The table thus created is called a
+      <firstterm>partitioned</firstterm> table.  The parenthesized list of
+      columns or expressions forms the <firstterm>partition key</firstterm>
+      for the table.  When using range partitioning, the partition key can
+      include multiple columns or expressions, but for list partitioning, the
+      partition key must consist of a single column or expression.  If no
+      btree operator class is specified when creating a partitioned table,
+      the default btree operator class for the datatype will be used.  If
+      there is none, an error will be reported.
+     </para>
+
+     <para>
+      A partitioned table is divided into sub-tables (called partitions),
+      which are created using separate <literal>CREATE TABLE</> commands.
+      The partitioned table is itself empty.  A data row inserted into the
+      table is routed to a partition based on the value of columns or
+      expressions in the partition key.  If no existing partition matches
+      the values in the new row, an error will be reported.
+     </para>
+
+     <para>
+      Partitioned tables do not support <literal>UNIQUE</literal>,
+      <literal>PRIMARY KEY</literal>, <literal>EXCLUDE</literal>, or
+      <literal>FOREIGN KEY</literal> constraints; however, you can define
+      these constraints on individual partitions.
+     </para>
+
+     <para>
+      When using range partitioning, a <literal>NOT NULL</literal> constraint
+      is added to each non-expression column in the partition key.
+     </para>
+
+    </listitem>
+   </varlistentry>
+
    <varlistentry>
     <term><literal>LIKE <replaceable>source_table</replaceable> [ <replaceable>like_option</replaceable> ... ]</literal></term>
     <listitem>
@@ -1368,6 +1471,57 @@ CREATE TABLE employees OF employee_type (
     PRIMARY KEY (name),
     salary WITH OPTIONS DEFAULT 1000
 );
+</programlisting></para>
+
+  <para>
+   Create a range partitioned table:
+<programlisting>
+CREATE TABLE measurement (
+    city_id         int not null,
+    logdate         date not null,
+    peaktemp        int,
+    unitsales       int
+) PARTITION BY RANGE (logdate);
+</programlisting></para>
+
+  <para>
+   Create a list partitioned table:
+<programlisting>
+CREATE TABLE cities (
+    name         text not null,
+    population   int,
+) PARTITION BY LIST (initcap(name));
+</programlisting></para>
+
+  <para>
+   Create partition of a range partitioned table:
+<programlisting>
+CREATE TABLE measurement_y2016m07
+    PARTITION OF measurement (
+    unitsales WITH OPTIONS DEFAULT 0
+) FOR VALUES FROM ('2016-07-01') TO ('2016-08-01');
+</programlisting></para>
+
+  <para>
+   Create partition of a list partitioned table:
+<programlisting>
+CREATE TABLE cities_west
+    PARTITION OF cities (
+    CONSTRAINT city_id_nonzero CHECK (city_id != 0)
+) FOR VALUES IN ('Los Angeles', 'San Francisco');
+</programlisting></para>
+
+  <para>
+   Create partition of a list partitioned table that is itself further
+   partitioned and then add a partition to it:
+<programlisting>
+CREATE TABLE cities_west
+    PARTITION OF cities (
+    CONSTRAINT city_id_nonzero CHECK (city_id != 0)
+) FOR VALUES IN ('Los Angeles', 'San Francisco') PARTITION BY RANGE (population);
+
+CREATE TABLE cities_west_10000_to_100000
+    PARTITION OF cities_west FOR VALUES FROM (10000) TO (100000);
 </programlisting></para>
  </refsect1>
 
index 83a97b06ab85bdfaa04c82bf2479948a2cc68849..34018cac7c89a729877f3f286d7cfc8d392a9bd3 100644 (file)
@@ -930,6 +930,7 @@ extractRelOptions(HeapTuple tuple, TupleDesc tupdesc,
        case RELKIND_RELATION:
        case RELKIND_TOASTVALUE:
        case RELKIND_MATVIEW:
+       case RELKIND_PARTITIONED_TABLE:
            options = heap_reloptions(classForm->relkind, datum, false);
            break;
        case RELKIND_VIEW:
@@ -1381,6 +1382,7 @@ heap_reloptions(char relkind, Datum reloptions, bool validate)
            return (bytea *) rdopts;
        case RELKIND_RELATION:
        case RELKIND_MATVIEW:
+       case RELKIND_PARTITIONED_TABLE:
            return default_reloptions(reloptions, validate, RELOPT_KIND_HEAP);
        default:
            /* other relkinds are not supported */
index 1ce761004979d2c5eb56021ed200fae5a863a22d..2d5ac09bece554bdd9dee157e6dc1111dc2c67df 100644 (file)
@@ -11,7 +11,7 @@ top_builddir = ../../..
 include $(top_builddir)/src/Makefile.global
 
 OBJS = catalog.o dependency.o heap.o index.o indexing.o namespace.o aclchk.o \
-       objectaccess.o objectaddress.o pg_aggregate.o pg_collation.o \
+       objectaccess.o objectaddress.o partition.o pg_aggregate.o pg_collation.o \
        pg_constraint.o pg_conversion.o \
        pg_depend.o pg_enum.o pg_inherits.o pg_largeobject.o pg_namespace.o \
        pg_operator.o pg_proc.o pg_range.o pg_db_role_setting.o pg_shdepend.o \
@@ -41,7 +41,7 @@ POSTGRES_BKI_SRCS = $(addprefix $(top_srcdir)/src/include/catalog/,\
    pg_foreign_data_wrapper.h pg_foreign_server.h pg_user_mapping.h \
    pg_foreign_table.h pg_policy.h pg_replication_origin.h \
    pg_default_acl.h pg_init_privs.h pg_seclabel.h pg_shseclabel.h \
-   pg_collation.h pg_range.h pg_transform.h \
+   pg_collation.h pg_partitioned_table.h pg_range.h pg_transform.h \
    toasting.h indexing.h \
     )
 
index c0df6710d1d110efbf767c82e12f15c793dadff3..3086021432ab3abed6660669ceff3415740319fa 100644 (file)
@@ -768,6 +768,8 @@ objectsInSchemaToOids(GrantObjectType objtype, List *nspnames)
                objects = list_concat(objects, objs);
                objs = getRelationsInNamespace(namespaceId, RELKIND_FOREIGN_TABLE);
                objects = list_concat(objects, objs);
+               objs = getRelationsInNamespace(namespaceId, RELKIND_PARTITIONED_TABLE);
+               objects = list_concat(objects, objs);
                break;
            case ACL_OBJECT_SEQUENCE:
                objs = getRelationsInNamespace(namespaceId, RELKIND_SEQUENCE);
index b697e88ef0910a4d6c828f0a2981e83562178812..0cdd1c5c6cb05a6122cdd49f5205b968e6dcdba7 100644 (file)
@@ -1352,7 +1352,8 @@ void
 recordDependencyOnSingleRelExpr(const ObjectAddress *depender,
                                Node *expr, Oid relId,
                                DependencyType behavior,
-                               DependencyType self_behavior)
+                               DependencyType self_behavior,
+                               bool ignore_self)
 {
    find_expr_references_context context;
    RangeTblEntry rte;
@@ -1407,9 +1408,10 @@ recordDependencyOnSingleRelExpr(const ObjectAddress *depender,
        context.addrs->numrefs = outrefs;
 
        /* Record the self-dependencies */
-       recordMultipleDependencies(depender,
-                                  self_addrs->refs, self_addrs->numrefs,
-                                  self_behavior);
+       if (!ignore_self)
+           recordMultipleDependencies(depender,
+                                      self_addrs->refs, self_addrs->numrefs,
+                                      self_behavior);
 
        free_object_addresses(self_addrs);
    }
index 0b804e7ac6094e68570b081146379fe92f679c55..7f5bad0b5da322536d57a3047bcc4aa8c92630df 100644 (file)
@@ -41,6 +41,7 @@
 #include "catalog/heap.h"
 #include "catalog/index.h"
 #include "catalog/objectaccess.h"
+#include "catalog/partition.h"
 #include "catalog/pg_attrdef.h"
 #include "catalog/pg_collation.h"
 #include "catalog/pg_constraint.h"
@@ -48,6 +49,8 @@
 #include "catalog/pg_foreign_table.h"
 #include "catalog/pg_inherits.h"
 #include "catalog/pg_namespace.h"
+#include "catalog/pg_opclass.h"
+#include "catalog/pg_partitioned_table.h"
 #include "catalog/pg_statistic.h"
 #include "catalog/pg_tablespace.h"
 #include "catalog/pg_type.h"
@@ -808,6 +811,7 @@ InsertPgClassTuple(Relation pg_class_desc,
    values[Anum_pg_class_relhassubclass - 1] = BoolGetDatum(rd_rel->relhassubclass);
    values[Anum_pg_class_relispopulated - 1] = BoolGetDatum(rd_rel->relispopulated);
    values[Anum_pg_class_relreplident - 1] = CharGetDatum(rd_rel->relreplident);
+   values[Anum_pg_class_relispartition - 1] = BoolGetDatum(rd_rel->relispartition);
    values[Anum_pg_class_relfrozenxid - 1] = TransactionIdGetDatum(rd_rel->relfrozenxid);
    values[Anum_pg_class_relminmxid - 1] = MultiXactIdGetDatum(rd_rel->relminmxid);
    if (relacl != (Datum) 0)
@@ -819,6 +823,9 @@ InsertPgClassTuple(Relation pg_class_desc,
    else
        nulls[Anum_pg_class_reloptions - 1] = true;
 
+   /* relpartbound is set by updating this tuple, if necessary */
+   nulls[Anum_pg_class_relpartbound - 1] = true;
+
    tup = heap_form_tuple(RelationGetDescr(pg_class_desc), values, nulls);
 
    /*
@@ -924,6 +931,9 @@ AddNewRelationTuple(Relation pg_class_desc,
    new_rel_reltup->reltype = new_type_oid;
    new_rel_reltup->reloftype = reloftype;
 
+   /* relispartition is always set by updating this tuple later */
+   new_rel_reltup->relispartition = false;
+
    new_rel_desc->rd_att->tdtypeid = new_type_oid;
 
    /* Now build and insert the tuple */
@@ -1104,7 +1114,8 @@ heap_create_with_catalog(const char *relname,
        if (IsBinaryUpgrade &&
            (relkind == RELKIND_RELATION || relkind == RELKIND_SEQUENCE ||
             relkind == RELKIND_VIEW || relkind == RELKIND_MATVIEW ||
-            relkind == RELKIND_COMPOSITE_TYPE || relkind == RELKIND_FOREIGN_TABLE))
+            relkind == RELKIND_COMPOSITE_TYPE || relkind == RELKIND_FOREIGN_TABLE ||
+            relkind == RELKIND_PARTITIONED_TABLE))
        {
            if (!OidIsValid(binary_upgrade_next_heap_pg_class_oid))
                ereport(ERROR,
@@ -1138,6 +1149,7 @@ heap_create_with_catalog(const char *relname,
            case RELKIND_VIEW:
            case RELKIND_MATVIEW:
            case RELKIND_FOREIGN_TABLE:
+           case RELKIND_PARTITIONED_TABLE:
                relacl = get_user_default_acl(ACL_OBJECT_RELATION, ownerid,
                                              relnamespace);
                break;
@@ -1182,7 +1194,8 @@ heap_create_with_catalog(const char *relname,
                              relkind == RELKIND_VIEW ||
                              relkind == RELKIND_MATVIEW ||
                              relkind == RELKIND_FOREIGN_TABLE ||
-                             relkind == RELKIND_COMPOSITE_TYPE))
+                             relkind == RELKIND_COMPOSITE_TYPE ||
+                             relkind == RELKIND_PARTITIONED_TABLE))
        new_array_oid = AssignTypeArrayOid();
 
    /*
@@ -1349,7 +1362,9 @@ heap_create_with_catalog(const char *relname,
    if (relpersistence == RELPERSISTENCE_UNLOGGED)
    {
        Assert(relkind == RELKIND_RELATION || relkind == RELKIND_MATVIEW ||
-              relkind == RELKIND_TOASTVALUE);
+              relkind == RELKIND_TOASTVALUE ||
+              relkind == RELKIND_PARTITIONED_TABLE);
+
        heap_create_init_fork(new_rel_desc);
    }
 
@@ -1754,12 +1769,29 @@ void
 heap_drop_with_catalog(Oid relid)
 {
    Relation    rel;
+   Oid         parentOid;
+   Relation    parent = NULL;
 
    /*
     * Open and lock the relation.
     */
    rel = relation_open(relid, AccessExclusiveLock);
 
+   /*
+    * If the relation is a partition, we must grab exclusive lock on its
+    * parent because we need to update its partition descriptor. We must
+    * take a table lock strong enough to prevent all queries on the parent
+    * from proceeding until we commit and send out a shared-cache-inval
+    * notice that will make them update their partition descriptor.
+    * Sometimes, doing this is cycles spent uselessly, especially if the
+    * parent will be dropped as part of the same command anyway.
+    */
+   if (rel->rd_rel->relispartition)
+   {
+       parentOid = get_partition_parent(relid);
+       parent = heap_open(parentOid, AccessExclusiveLock);
+   }
+
    /*
     * There can no longer be anyone *else* touching the relation, but we
     * might still have open queries or cursors, or pending trigger events, in
@@ -1795,6 +1827,12 @@ heap_drop_with_catalog(Oid relid)
        heap_close(rel, RowExclusiveLock);
    }
 
+   /*
+    * If a partitioned table, delete the pg_partitioned_table tuple.
+    */
+   if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+       RemovePartitionKeyByRelId(relid);
+
    /*
     * Schedule unlinking of the relation's physical files at commit.
     */
@@ -1845,6 +1883,12 @@ heap_drop_with_catalog(Oid relid)
     * delete relation tuple
     */
    DeleteRelationTuple(relid);
+
+   if (parent)
+   {
+       CacheInvalidateRelcache(parent);
+       heap_close(parent, NoLock);     /* keep the lock */
+   }
 }
 
 
@@ -2027,6 +2071,17 @@ StoreRelCheck(Relation rel, char *ccname, Node *expr,
    else
        attNos = NULL;
 
+   /*
+    * Partitioned tables do not contain any rows themselves, so a NO INHERIT
+    * constraint makes no sense.
+    */
+   if (is_no_inherit &&
+       rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+       ereport(ERROR,
+               (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+                errmsg("cannot add NO INHERIT constraint to partitioned table \"%s\"",
+                        RelationGetRelationName(rel))));
+
    /*
     * Create the Check Constraint
     */
@@ -2440,8 +2495,11 @@ MergeWithExistingConstraint(Relation rel, char *ccname, Node *expr,
             * definition) then interpret addition of a local constraint as a
             * legal merge.  This allows ALTER ADD CONSTRAINT on parent and
             * child tables to be given in either order with same end state.
+            * However if the relation is a partition, all inherited
+            * constraints are always non-local, including those that were
+            * merged.
             */
-           if (is_local && !con->conislocal)
+           if (is_local && !con->conislocal && !rel->rd_rel->relispartition)
                allow_merge = true;
 
            if (!found || !allow_merge)
@@ -2486,10 +2544,24 @@ MergeWithExistingConstraint(Relation rel, char *ccname, Node *expr,
            tup = heap_copytuple(tup);
            con = (Form_pg_constraint) GETSTRUCT(tup);
 
-           if (is_local)
-               con->conislocal = true;
+           /*
+            * In case of partitions, an inherited constraint must be
+            * inherited only once since it cannot have multiple parents and
+            * it is never considered local.
+            */
+           if (rel->rd_rel->relispartition)
+           {
+               con->coninhcount = 1;
+               con->conislocal = false;
+           }
            else
-               con->coninhcount++;
+           {
+               if (is_local)
+                   con->conislocal = true;
+               else
+                   con->coninhcount++;
+           }
+
            if (is_no_inherit)
            {
                Assert(is_local);
@@ -3013,3 +3085,187 @@ insert_ordered_unique_oid(List *list, Oid datum)
    lappend_cell_oid(list, prev, datum);
    return list;
 }
+
+/*
+ * StorePartitionKey
+ *     Store information about the partition key rel into the catalog
+ */
+void
+StorePartitionKey(Relation rel,
+                 char strategy,
+                 int16 partnatts,
+                 AttrNumber *partattrs,
+                 List *partexprs,
+                 Oid *partopclass,
+                 Oid *partcollation)
+{
+   int         i;
+   int2vector *partattrs_vec;
+   oidvector  *partopclass_vec;
+   oidvector  *partcollation_vec;
+   Datum       partexprDatum;
+   Relation    pg_partitioned_table;
+   HeapTuple   tuple;
+   Datum       values[Natts_pg_partitioned_table];
+   bool        nulls[Natts_pg_partitioned_table];
+   ObjectAddress   myself;
+   ObjectAddress   referenced;
+
+   Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
+
+   tuple = SearchSysCache1(PARTRELID,
+                           ObjectIdGetDatum(RelationGetRelid(rel)));
+
+   /* Copy the partition attribute numbers, opclass OIDs into arrays */
+   partattrs_vec = buildint2vector(partattrs, partnatts);
+   partopclass_vec = buildoidvector(partopclass, partnatts);
+   partcollation_vec = buildoidvector(partcollation, partnatts);
+
+   /* Convert the expressions (if any) to a text datum */
+   if (partexprs)
+   {
+       char       *exprString;
+
+       exprString = nodeToString(partexprs);
+       partexprDatum = CStringGetTextDatum(exprString);
+       pfree(exprString);
+   }
+   else
+       partexprDatum = (Datum) 0;
+
+   pg_partitioned_table = heap_open(PartitionedRelationId, RowExclusiveLock);
+
+   MemSet(nulls, false, sizeof(nulls));
+
+   /* Only this can ever be NULL */
+   if (!partexprDatum)
+       nulls[Anum_pg_partitioned_table_partexprs - 1] = true;
+
+   values[Anum_pg_partitioned_table_partrelid - 1] = ObjectIdGetDatum(RelationGetRelid(rel));
+   values[Anum_pg_partitioned_table_partstrat - 1] = CharGetDatum(strategy);
+   values[Anum_pg_partitioned_table_partnatts - 1] = Int16GetDatum(partnatts);
+   values[Anum_pg_partitioned_table_partattrs - 1] =  PointerGetDatum(partattrs_vec);
+   values[Anum_pg_partitioned_table_partclass - 1] = PointerGetDatum(partopclass_vec);
+   values[Anum_pg_partitioned_table_partcollation - 1] = PointerGetDatum(partcollation_vec);
+   values[Anum_pg_partitioned_table_partexprs - 1] = partexprDatum;
+
+   tuple = heap_form_tuple(RelationGetDescr(pg_partitioned_table), values, nulls);
+
+   simple_heap_insert(pg_partitioned_table, tuple);
+
+   /* Update the indexes on pg_partitioned_table */
+   CatalogUpdateIndexes(pg_partitioned_table, tuple);
+   heap_close(pg_partitioned_table, RowExclusiveLock);
+
+   /* Mark this relation as dependent on a few things as follows */
+   myself.classId = RelationRelationId;
+   myself.objectId = RelationGetRelid(rel);;
+   myself.objectSubId = 0;
+
+   /* Operator class and collation per key column */
+   for (i = 0; i < partnatts; i++)
+   {
+       referenced.classId = OperatorClassRelationId;
+       referenced.objectId = partopclass[i];
+       referenced.objectSubId = 0;
+
+       recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+
+       referenced.classId = CollationRelationId;
+       referenced.objectId = partcollation[i];
+       referenced.objectSubId = 0;
+
+       recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+   }
+
+   /*
+    * Anything mentioned in the expressions.  We must ignore the column
+    * references, which will depend on the table itself; there is no
+    * separate partition key object.
+    */
+   if (partexprs)
+       recordDependencyOnSingleRelExpr(&myself,
+                                       (Node *) partexprs,
+                                       RelationGetRelid(rel),
+                                       DEPENDENCY_NORMAL,
+                                       DEPENDENCY_AUTO, true);
+
+   /*
+    * We must invalidate the relcache so that the next
+    * CommandCounterIncrement() will cause the same to be rebuilt using the
+    * information in just created catalog entry.
+    */
+   CacheInvalidateRelcache(rel);
+}
+
+/*
+ *  RemovePartitionKeyByRelId
+ *     Remove pg_partitioned_table entry for a relation
+ */
+void
+RemovePartitionKeyByRelId(Oid relid)
+{
+   Relation    rel;
+   HeapTuple   tuple;
+
+   rel = heap_open(PartitionedRelationId, RowExclusiveLock);
+
+   tuple = SearchSysCache1(PARTRELID, ObjectIdGetDatum(relid));
+   if (!HeapTupleIsValid(tuple))
+       elog(ERROR, "cache lookup failed for partition key of relation %u",
+            relid);
+
+   simple_heap_delete(rel, &tuple->t_self);
+
+   ReleaseSysCache(tuple);
+   heap_close(rel, RowExclusiveLock);
+}
+
+/*
+ * StorePartitionBound
+ *     Update pg_class tuple of rel to store the partition bound and set
+ *     relispartition to true
+ */
+void
+StorePartitionBound(Relation rel, Node *bound)
+{
+   Relation    classRel;
+   HeapTuple   tuple,
+               newtuple;
+   Datum   new_val[Natts_pg_class];
+   bool    new_null[Natts_pg_class],
+           new_repl[Natts_pg_class];
+
+   /* Update pg_class tuple */
+   classRel = heap_open(RelationRelationId, RowExclusiveLock);
+   tuple = SearchSysCacheCopy1(RELOID,
+                               ObjectIdGetDatum(RelationGetRelid(rel)));
+#ifdef USE_ASSERT_CHECKING
+   {
+       Form_pg_class   classForm;
+       bool    isnull;
+
+       classForm = (Form_pg_class) GETSTRUCT(tuple);
+       Assert(!classForm->relispartition);
+       (void) SysCacheGetAttr(RELOID, tuple, Anum_pg_class_relpartbound,
+                              &isnull);
+       Assert(isnull);
+   }
+#endif
+
+   /* Fill in relpartbound value */
+   memset(new_val, 0, sizeof(new_val));
+   memset(new_null, false, sizeof(new_null));
+   memset(new_repl, false, sizeof(new_repl));
+   new_val[Anum_pg_class_relpartbound - 1] = CStringGetTextDatum(nodeToString(bound));
+   new_null[Anum_pg_class_relpartbound - 1] = false;
+   new_repl[Anum_pg_class_relpartbound - 1] = true;
+   newtuple = heap_modify_tuple(tuple, RelationGetDescr(classRel),
+                                new_val, new_null, new_repl);
+   /* Also set the flag */
+   ((Form_pg_class) GETSTRUCT(newtuple))->relispartition = true;
+   simple_heap_update(classRel, &newtuple->t_self, newtuple);
+   CatalogUpdateIndexes(classRel, newtuple);
+   heap_freetuple(newtuple);
+   heap_close(classRel, RowExclusiveLock);
+}
index 08b646d8f33cbdedb34eebc30ad452731ca6b377..08b0989112bc9902548980b703fa84eff9e16901 100644 (file)
@@ -1043,7 +1043,7 @@ index_create(Relation heapRelation,
                                          (Node *) indexInfo->ii_Expressions,
                                            heapRelationId,
                                            DEPENDENCY_NORMAL,
-                                           DEPENDENCY_AUTO);
+                                           DEPENDENCY_AUTO, false);
        }
 
        /* Store dependencies on anything mentioned in predicate */
@@ -1053,7 +1053,7 @@ index_create(Relation heapRelation,
                                            (Node *) indexInfo->ii_Predicate,
                                            heapRelationId,
                                            DEPENDENCY_NORMAL,
-                                           DEPENDENCY_AUTO);
+                                           DEPENDENCY_AUTO, false);
        }
    }
    else
index d531d17cdbcb0c37c4a8c7a0cd992147b877c4f7..bb4b080b007915a1e4def685dfd54f0983d8c08b 100644 (file)
@@ -1204,7 +1204,8 @@ get_relation_by_qualified_name(ObjectType objtype, List *objname,
                                RelationGetRelationName(relation))));
            break;
        case OBJECT_TABLE:
-           if (relation->rd_rel->relkind != RELKIND_RELATION)
+           if (relation->rd_rel->relkind != RELKIND_RELATION &&
+               relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
                ereport(ERROR,
                        (errcode(ERRCODE_WRONG_OBJECT_TYPE),
                         errmsg("\"%s\" is not a table",
@@ -3244,6 +3245,7 @@ getRelationDescription(StringInfo buffer, Oid relid)
    switch (relForm->relkind)
    {
        case RELKIND_RELATION:
+       case RELKIND_PARTITIONED_TABLE:
            appendStringInfo(buffer, _("table %s"),
                             relname);
            break;
@@ -3701,6 +3703,7 @@ getRelationTypeDescription(StringInfo buffer, Oid relid, int32 objectSubId)
    switch (relForm->relkind)
    {
        case RELKIND_RELATION:
+       case RELKIND_PARTITIONED_TABLE:
            appendStringInfoString(buffer, "table");
            break;
        case RELKIND_INDEX:
diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c
new file mode 100644 (file)
index 0000000..6dab45f
--- /dev/null
@@ -0,0 +1,1917 @@
+/*-------------------------------------------------------------------------
+ *
+ * partition.c
+ *       Partitioning related data structures and functions.
+ *
+ * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *       src/backend/catalog/partition.c
+ *
+ *-------------------------------------------------------------------------
+*/
+
+#include "postgres.h"
+
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "access/nbtree.h"
+#include "access/sysattr.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/objectaddress.h"
+#include "catalog/partition.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_inherits.h"
+#include "catalog/pg_inherits_fn.h"
+#include "catalog/pg_opclass.h"
+#include "catalog/pg_type.h"
+#include "executor/executor.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/parsenodes.h"
+#include "optimizer/clauses.h"
+#include "optimizer/planmain.h"
+#include "optimizer/var.h"
+#include "rewrite/rewriteManip.h"
+#include "storage/lmgr.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/datum.h"
+#include "utils/memutils.h"
+#include "utils/fmgroids.h"
+#include "utils/inval.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/ruleutils.h"
+#include "utils/syscache.h"
+
+/*
+ * Information about bounds of a partitioned relation
+ *
+ * A list partition datum that is known to be NULL is never put into the
+ * datums array. Instead, it is tracked using has_null and null_index fields.
+ *
+ * In the case of range partitioning, ndatums will typically be far less than
+ * 2 * nparts, because a partition's upper bound and the next partition's lower
+ * bound are the same in most common cases, and we only store one of them.
+ *
+ * In the case of list partitioning, the indexes array stores one entry for
+ * every datum, which is the index of the partition that accepts a given datum.
+ * In case of range partitioning, it stores one entry per distinct range
+ * datum, which is the index of the partition for which a given datum
+ * is an upper bound.
+ */
+
+/* Ternary value to represent what's contained in a range bound datum */
+typedef enum RangeDatumContent
+{
+   RANGE_DATUM_FINITE = 0,     /* actual datum stored elsewhere */
+   RANGE_DATUM_NEG_INF,        /* negative infinity */
+   RANGE_DATUM_POS_INF         /* positive infinity */
+} RangeDatumContent;
+
+typedef struct PartitionBoundInfoData
+{
+   char        strategy;       /* list or range bounds? */
+   int         ndatums;        /* Length of the datums following array */
+   Datum     **datums;         /* Array of datum-tuples with key->partnatts
+                                * datums each */
+   RangeDatumContent **content;/* what's contained in each range bound datum?
+                                * (see the above enum); NULL for list
+                                * partitioned tables */
+   int        *indexes;        /* Partition indexes; one entry per member of
+                                * the datums array (plus one if range
+                                * partitioned table) */
+   bool        has_null;       /* Is there a null-accepting partition? false
+                                * for range partitioned tables */
+   int         null_index;     /* Index of the null-accepting partition; -1
+                                * for range partitioned tables */
+} PartitionBoundInfoData;
+
+/*
+ * When qsort'ing partition bounds after reading from the catalog, each bound
+ * is represented with one of the following structs.
+ */
+
+/* One value coming from some (index'th) list partition */
+typedef struct PartitionListValue
+{
+   int         index;
+   Datum       value;
+} PartitionListValue;
+
+/* One bound of a range partition */
+typedef struct PartitionRangeBound
+{
+   int         index;
+   Datum      *datums;         /* range bound datums */
+   RangeDatumContent *content; /* what's contained in each datum? */
+   bool        lower;          /* this is the lower (vs upper) bound */
+} PartitionRangeBound;
+
+static int32 qsort_partition_list_value_cmp(const void *a, const void *b,
+                              void *arg);
+static int32 qsort_partition_rbound_cmp(const void *a, const void *b,
+                          void *arg);
+
+static List *get_qual_for_list(PartitionKey key, PartitionBoundSpec *spec);
+static List *get_qual_for_range(PartitionKey key, PartitionBoundSpec *spec);
+static Oid get_partition_operator(PartitionKey key, int col,
+                      StrategyNumber strategy, bool *need_relabel);
+static List *generate_partition_qual(Relation rel, bool recurse);
+
+static PartitionRangeBound *make_one_range_bound(PartitionKey key, int index,
+                    List *datums, bool lower);
+static int32 partition_rbound_cmp(PartitionKey key,
+                    Datum *datums1, RangeDatumContent *content1, bool lower1,
+                    PartitionRangeBound *b2);
+static int32 partition_rbound_datum_cmp(PartitionKey key,
+                          Datum *rb_datums, RangeDatumContent *rb_content,
+                          Datum *tuple_datums);
+
+static int32 partition_bound_cmp(PartitionKey key,
+                   PartitionBoundInfo boundinfo,
+                   int offset, void *probe, bool probe_is_bound);
+static int partition_bound_bsearch(PartitionKey key,
+                       PartitionBoundInfo boundinfo,
+                       void *probe, bool probe_is_bound, bool *is_equal);
+
+/* Support get_partition_for_tuple() */
+static void FormPartitionKeyDatum(PartitionDispatch pd,
+                     TupleTableSlot *slot,
+                     EState *estate,
+                     Datum *values,
+                     bool *isnull);
+
+/*
+ * RelationBuildPartitionDesc
+ *     Form rel's partition descriptor
+ *
+ * Not flushed from the cache by RelationClearRelation() unless changed because
+ * of addition or removal of partition.
+ */
+void
+RelationBuildPartitionDesc(Relation rel)
+{
+   List       *inhoids,
+              *partoids;
+   Oid        *oids = NULL;
+   List       *boundspecs = NIL;
+   ListCell   *cell;
+   int         i,
+               nparts;
+   PartitionKey key = RelationGetPartitionKey(rel);
+   PartitionDesc result;
+   MemoryContext oldcxt;
+
+   int         ndatums = 0;
+
+   /* List partitioning specific */
+   PartitionListValue **all_values = NULL;
+   bool        found_null = false;
+   int         null_index = -1;
+
+   /* Range partitioning specific */
+   PartitionRangeBound **rbounds = NULL;
+
+   /*
+    * The following could happen in situations where rel has a pg_class entry
+    * but not the pg_partitioned_table entry yet.
+    */
+   if (key == NULL)
+       return;
+
+   /* Get partition oids from pg_inherits */
+   inhoids = find_inheritance_children(RelationGetRelid(rel), NoLock);
+
+   /* Collect bound spec nodes in a list */
+   i = 0;
+   partoids = NIL;
+   foreach(cell, inhoids)
+   {
+       Oid         inhrelid = lfirst_oid(cell);
+       HeapTuple   tuple;
+       Datum       datum;
+       bool        isnull;
+       Node       *boundspec;
+
+       tuple = SearchSysCache1(RELOID, inhrelid);
+
+       /*
+        * It is possible that the pg_class tuple of a partition has not been
+        * updated yet to set its relpartbound field.  The only case where
+        * this happens is when we open the parent relation to check using its
+        * partition descriptor that a new partition's bound does not overlap
+        * some existing partition.
+        */
+       if (!((Form_pg_class) GETSTRUCT(tuple))->relispartition)
+       {
+           ReleaseSysCache(tuple);
+           continue;
+       }
+
+       datum = SysCacheGetAttr(RELOID, tuple,
+                               Anum_pg_class_relpartbound,
+                               &isnull);
+       Assert(!isnull);
+       boundspec = (Node *) stringToNode(TextDatumGetCString(datum));
+       boundspecs = lappend(boundspecs, boundspec);
+       partoids = lappend_oid(partoids, inhrelid);
+       ReleaseSysCache(tuple);
+   }
+
+   nparts = list_length(partoids);
+
+   if (nparts > 0)
+   {
+       oids = (Oid *) palloc(nparts * sizeof(Oid));
+       i = 0;
+       foreach(cell, partoids)
+           oids[i++] = lfirst_oid(cell);
+
+       /* Convert from node to the internal representation */
+       if (key->strategy == PARTITION_STRATEGY_LIST)
+       {
+           List       *non_null_values = NIL;
+
+           /*
+            * Create a unified list of non-null values across all partitions.
+            */
+           i = 0;
+           found_null = false;
+           null_index = -1;
+           foreach(cell, boundspecs)
+           {
+               ListCell   *c;
+               PartitionBoundSpec *spec = lfirst(cell);
+
+               if (spec->strategy != PARTITION_STRATEGY_LIST)
+                   elog(ERROR, "invalid strategy in partition bound spec");
+
+               foreach(c, spec->listdatums)
+               {
+                   Const      *val = lfirst(c);
+                   PartitionListValue *list_value = NULL;
+
+                   if (!val->constisnull)
+                   {
+                       list_value = (PartitionListValue *)
+                           palloc0(sizeof(PartitionListValue));
+                       list_value->index = i;
+                       list_value->value = val->constvalue;
+                   }
+                   else
+                   {
+                       /*
+                        * Never put a null into the values array, flag
+                        * instead for the code further down below where we
+                        * construct the actual relcache struct.
+                        */
+                       if (found_null)
+                           elog(ERROR, "found null more than once");
+                       found_null = true;
+                       null_index = i;
+                   }
+
+                   if (list_value)
+                       non_null_values = lappend(non_null_values,
+                                                 list_value);
+               }
+
+               i++;
+           }
+
+           ndatums = list_length(non_null_values);
+
+           /*
+            * Collect all list values in one array. Alongside the value, we
+            * also save the index of partition the value comes from.
+            */
+           all_values = (PartitionListValue **) palloc(ndatums *
+                                              sizeof(PartitionListValue *));
+           i = 0;
+           foreach(cell, non_null_values)
+           {
+               PartitionListValue *src = lfirst(cell);
+
+               all_values[i] = (PartitionListValue *)
+                   palloc(sizeof(PartitionListValue));
+               all_values[i]->value = src->value;
+               all_values[i]->index = src->index;
+               i++;
+           }
+
+           qsort_arg(all_values, ndatums, sizeof(PartitionListValue *),
+                     qsort_partition_list_value_cmp, (void *) key);
+       }
+       else if (key->strategy == PARTITION_STRATEGY_RANGE)
+       {
+           int         j,
+                       k;
+           PartitionRangeBound **all_bounds,
+                      *prev;
+           bool       *distinct_indexes;
+
+           all_bounds = (PartitionRangeBound **) palloc0(2 * nparts *
+                                             sizeof(PartitionRangeBound *));
+           distinct_indexes = (bool *) palloc(2 * nparts * sizeof(bool));
+
+           /*
+            * Create a unified list of range bounds across all the
+            * partitions.
+            */
+           i = j = 0;
+           foreach(cell, boundspecs)
+           {
+               PartitionBoundSpec *spec = lfirst(cell);
+               PartitionRangeBound *lower,
+                          *upper;
+
+               if (spec->strategy != PARTITION_STRATEGY_RANGE)
+                   elog(ERROR, "invalid strategy in partition bound spec");
+
+               lower = make_one_range_bound(key, i, spec->lowerdatums,
+                                            true);
+               upper = make_one_range_bound(key, i, spec->upperdatums,
+                                            false);
+               all_bounds[j] = lower;
+               all_bounds[j + 1] = upper;
+               j += 2;
+               i++;
+           }
+           Assert(j == 2 * nparts);
+
+           /* Sort all the bounds in ascending order */
+           qsort_arg(all_bounds, 2 * nparts,
+                     sizeof(PartitionRangeBound *),
+                     qsort_partition_rbound_cmp,
+                     (void *) key);
+
+           /*
+            * Count the number of distinct bounds to allocate an array of
+            * that size.
+            */
+           ndatums = 0;
+           prev = NULL;
+           for (i = 0; i < 2 * nparts; i++)
+           {
+               PartitionRangeBound *cur = all_bounds[i];
+               bool        is_distinct = false;
+               int         j;
+
+               /* Is current bound is distinct from the previous? */
+               for (j = 0; j < key->partnatts; j++)
+               {
+                   Datum       cmpval;
+
+                   if (prev == NULL)
+                   {
+                       is_distinct = true;
+                       break;
+                   }
+
+                   /*
+                    * If either of them has infinite element, we can't equate
+                    * them.  Even when both are infinite, they'd have
+                    * opposite signs, because only one of cur and prev is a
+                    * lower bound).
+                    */
+                   if (cur->content[j] != RANGE_DATUM_FINITE ||
+                       prev->content[j] != RANGE_DATUM_FINITE)
+                   {
+                       is_distinct = true;
+                       break;
+                   }
+                   cmpval = FunctionCall2Coll(&key->partsupfunc[j],
+                                              key->partcollation[j],
+                                              cur->datums[j],
+                                              prev->datums[j]);
+                   if (DatumGetInt32(cmpval) != 0)
+                   {
+                       is_distinct = true;
+                       break;
+                   }
+               }
+
+               /*
+                * Count the current bound if it is distinct from the previous
+                * one.  Also, store if the index i contains a distinct bound
+                * that we'd like put in the relcache array.
+                */
+               if (is_distinct)
+               {
+                   distinct_indexes[i] = true;
+                   ndatums++;
+               }
+               else
+                   distinct_indexes[i] = false;
+
+               prev = cur;
+           }
+
+           /*
+            * Finally save them in an array from where they will be copied
+            * into the relcache.
+            */
+           rbounds = (PartitionRangeBound **) palloc(ndatums *
+                                             sizeof(PartitionRangeBound *));
+           k = 0;
+           for (i = 0; i < 2 * nparts; i++)
+           {
+               if (distinct_indexes[i])
+                   rbounds[k++] = all_bounds[i];
+           }
+           Assert(k == ndatums);
+       }
+       else
+           elog(ERROR, "unexpected partition strategy: %d",
+                (int) key->strategy);
+   }
+
+   /* Now build the actual relcache partition descriptor */
+   rel->rd_pdcxt = AllocSetContextCreate(CacheMemoryContext,
+                                         RelationGetRelationName(rel),
+                                         ALLOCSET_DEFAULT_SIZES);
+   oldcxt = MemoryContextSwitchTo(rel->rd_pdcxt);
+
+   result = (PartitionDescData *) palloc0(sizeof(PartitionDescData));
+   result->nparts = nparts;
+   if (nparts > 0)
+   {
+       PartitionBoundInfo boundinfo;
+       int        *mapping;
+       int         next_index = 0;
+
+       result->oids = (Oid *) palloc0(nparts * sizeof(Oid));
+
+       boundinfo = (PartitionBoundInfoData *)
+           palloc0(sizeof(PartitionBoundInfoData));
+       boundinfo->strategy = key->strategy;
+       boundinfo->ndatums = ndatums;
+       boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
+
+       /* Initialize mapping array with invalid values */
+       mapping = (int *) palloc(sizeof(int) * nparts);
+       for (i = 0; i < nparts; i++)
+           mapping[i] = -1;
+
+       switch (key->strategy)
+       {
+           case PARTITION_STRATEGY_LIST:
+               {
+                   boundinfo->has_null = found_null;
+                   boundinfo->indexes = (int *) palloc(ndatums * sizeof(int));
+
+                   /*
+                    * Copy values.  Indexes of individual values are mapped
+                    * to canonical values so that they match for any two list
+                    * partitioned tables with same number of partitions and
+                    * same lists per partition.  One way to canonicalize is
+                    * to assign the index in all_values[] of the smallest
+                    * value of each partition, as the index of all of the
+                    * partition's values.
+                    */
+                   for (i = 0; i < ndatums; i++)
+                   {
+                       boundinfo->datums[i] = (Datum *) palloc(sizeof(Datum));
+                       boundinfo->datums[i][0] = datumCopy(all_values[i]->value,
+                                                       key->parttypbyval[0],
+                                                        key->parttyplen[0]);
+
+                       /* If the old index has no mapping, assign one */
+                       if (mapping[all_values[i]->index] == -1)
+                           mapping[all_values[i]->index] = next_index++;
+
+                       boundinfo->indexes[i] = mapping[all_values[i]->index];
+                   }
+
+                   /*
+                    * If null-accepting partition has no mapped index yet,
+                    * assign one.  This could happen if such partition
+                    * accepts only null and hence not covered in the above
+                    * loop which only handled non-null values.
+                    */
+                   if (found_null)
+                   {
+                       Assert(null_index >= 0);
+                       if (mapping[null_index] == -1)
+                           mapping[null_index] = next_index++;
+                   }
+
+                   /* All partition must now have a valid mapping */
+                   Assert(next_index == nparts);
+
+                   if (found_null)
+                       boundinfo->null_index = mapping[null_index];
+                   else
+                       boundinfo->null_index = -1;
+                   break;
+               }
+
+           case PARTITION_STRATEGY_RANGE:
+               {
+                   boundinfo->content = (RangeDatumContent **) palloc(ndatums *
+                                               sizeof(RangeDatumContent *));
+                   boundinfo->indexes = (int *) palloc((ndatums + 1) *
+                                                       sizeof(int));
+
+                   for (i = 0; i < ndatums; i++)
+                   {
+                       int         j;
+
+                       boundinfo->datums[i] = (Datum *) palloc(key->partnatts *
+                                                             sizeof(Datum));
+                       boundinfo->content[i] = (RangeDatumContent *)
+                           palloc(key->partnatts *
+                                  sizeof(RangeDatumContent));
+                       for (j = 0; j < key->partnatts; j++)
+                       {
+                           if (rbounds[i]->content[j] == RANGE_DATUM_FINITE)
+                               boundinfo->datums[i][j] =
+                                   datumCopy(rbounds[i]->datums[j],
+                                             key->parttypbyval[j],
+                                             key->parttyplen[j]);
+                           /* Remember, we are storing the tri-state value. */
+                           boundinfo->content[i][j] = rbounds[i]->content[j];
+                       }
+
+                       /*
+                        * There is no mapping for invalid indexes.
+                        *
+                        * Any lower bounds in the rbounds array have invalid
+                        * indexes assigned, because the values between the
+                        * previous bound (if there is one) and this (lower)
+                        * bound are not part of the range of any existing
+                        * partition.
+                        */
+                       if (rbounds[i]->lower)
+                           boundinfo->indexes[i] = -1;
+                       else
+                       {
+                           int         orig_index = rbounds[i]->index;
+
+                           /* If the old index is has no mapping, assign one */
+                           if (mapping[orig_index] == -1)
+                               mapping[orig_index] = next_index++;
+
+                           boundinfo->indexes[i] = mapping[orig_index];
+                       }
+                   }
+                   boundinfo->indexes[i] = -1;
+                   break;
+               }
+
+           default:
+               elog(ERROR, "unexpected partition strategy: %d",
+                    (int) key->strategy);
+       }
+
+       result->boundinfo = boundinfo;
+
+       /*
+        * Now assign OIDs from the original array into mapped indexes of the
+        * result array.  Order of OIDs in the former is defined by the
+        * catalog scan that retrived them, whereas that in the latter is
+        * defined by canonicalized representation of the list values or the
+        * range bounds.
+        */
+       for (i = 0; i < nparts; i++)
+           result->oids[mapping[i]] = oids[i];
+       pfree(mapping);
+   }
+
+   MemoryContextSwitchTo(oldcxt);
+   rel->rd_partdesc = result;
+}
+
+/*
+ * Are two partition bound collections logically equal?
+ *
+ * Used in the keep logic of relcache.c (ie, in RelationClearRelation()).
+ * This is also useful when b1 and b2 are bound collections of two separate
+ * relations, respectively, because PartitionBoundInfo is a canonical
+ * representation of partition bounds.
+ */
+bool
+partition_bounds_equal(PartitionKey key,
+                      PartitionBoundInfo b1, PartitionBoundInfo b2)
+{
+   int         i;
+
+   if (b1->strategy != b2->strategy)
+       return false;
+
+   if (b1->ndatums != b2->ndatums)
+       return false;
+
+   if (b1->has_null != b2->has_null)
+       return false;
+
+   if (b1->null_index != b2->null_index)
+       return false;
+
+   for (i = 0; i < b1->ndatums; i++)
+   {
+       int         j;
+
+       for (j = 0; j < key->partnatts; j++)
+       {
+           int32       cmpval;
+
+           cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[j],
+                                                    key->partcollation[j],
+                                                    b1->datums[i][j],
+                                                    b2->datums[i][j]));
+           if (cmpval != 0)
+               return false;
+
+           /* Range partitions can have infinite datums */
+           if (b1->content != NULL && b1->content[i][j] != b2->content[i][j])
+               return false;
+       }
+
+       if (b1->indexes[i] != b2->indexes[i])
+           return false;
+   }
+
+   /* There are ndatums+1 indexes in case of range partitions */
+   if (key->strategy == PARTITION_STRATEGY_RANGE &&
+       b1->indexes[i] != b2->indexes[i])
+       return false;
+
+   return true;
+}
+
+/*
+ * check_new_partition_bound
+ *
+ * Checks if the new partition's bound overlaps any of the existing partitions
+ * of parent.  Also performs additional checks as necessary per strategy.
+ */
+void
+check_new_partition_bound(char *relname, Relation parent, Node *bound)
+{
+   PartitionBoundSpec *spec = (PartitionBoundSpec *) bound;
+   PartitionKey key = RelationGetPartitionKey(parent);
+   PartitionDesc partdesc = RelationGetPartitionDesc(parent);
+   ParseState *pstate = make_parsestate(NULL);
+   int         with = -1;
+   bool        overlap = false;
+
+   switch (key->strategy)
+   {
+       case PARTITION_STRATEGY_LIST:
+           {
+               Assert(spec->strategy == PARTITION_STRATEGY_LIST);
+
+               if (partdesc->nparts > 0)
+               {
+                   PartitionBoundInfo boundinfo = partdesc->boundinfo;
+                   ListCell   *cell;
+
+                   Assert(boundinfo &&
+                          boundinfo->strategy == PARTITION_STRATEGY_LIST &&
+                          (boundinfo->ndatums > 0 || boundinfo->has_null));
+
+                   foreach(cell, spec->listdatums)
+                   {
+                       Const      *val = lfirst(cell);
+
+                       if (!val->constisnull)
+                       {
+                           int         offset;
+                           bool        equal;
+
+                           offset = partition_bound_bsearch(key, boundinfo,
+                                                            &val->constvalue,
+                                                            true, &equal);
+                           if (offset >= 0 && equal)
+                           {
+                               overlap = true;
+                               with = boundinfo->indexes[offset];
+                               break;
+                           }
+                       }
+                       else if (boundinfo->has_null)
+                       {
+                           overlap = true;
+                           with = boundinfo->null_index;
+                           break;
+                       }
+                   }
+               }
+
+               break;
+           }
+
+       case PARTITION_STRATEGY_RANGE:
+           {
+               PartitionRangeBound *lower,
+                          *upper;
+
+               Assert(spec->strategy == PARTITION_STRATEGY_RANGE);
+               lower = make_one_range_bound(key, -1, spec->lowerdatums, true);
+               upper = make_one_range_bound(key, -1, spec->upperdatums, false);
+
+               /*
+                * First check if the resulting range would be empty with
+                * specified lower and upper bounds
+                */
+               if (partition_rbound_cmp(key, lower->datums, lower->content, true,
+                                        upper) >= 0)
+                   ereport(ERROR,
+                           (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+                   errmsg("cannot create range partition with empty range"),
+                            parser_errposition(pstate, spec->location)));
+
+               if (partdesc->nparts > 0)
+               {
+                   PartitionBoundInfo boundinfo = partdesc->boundinfo;
+                   int         off1,
+                               off2;
+                   bool        equal = false;
+
+                   Assert(boundinfo && boundinfo->ndatums > 0 &&
+                          boundinfo->strategy == PARTITION_STRATEGY_RANGE);
+
+                   /*
+                    * Find the greatest index of a range bound that is less
+                    * than or equal with the new lower bound.
+                    */
+                   off1 = partition_bound_bsearch(key, boundinfo, lower, true,
+                                                  &equal);
+
+                   /*
+                    * If equal has been set to true, that means the new lower
+                    * bound is found to be equal with the bound at off1,
+                    * which clearly means an overlap with the partition at
+                    * index off1+1).
+                    *
+                    * Otherwise, check if there is a "gap" that could be
+                    * occupied by the new partition.  In case of a gap, the
+                    * new upper bound should not cross past the upper
+                    * boundary of the gap, that is, off2 == off1 should be
+                    * true.
+                    */
+                   if (!equal && boundinfo->indexes[off1 + 1] < 0)
+                   {
+                       off2 = partition_bound_bsearch(key, boundinfo, upper,
+                                                      true, &equal);
+
+                       if (equal || off1 != off2)
+                       {
+                           overlap = true;
+                           with = boundinfo->indexes[off2 + 1];
+                       }
+                   }
+                   else
+                   {
+                       overlap = true;
+                       with = boundinfo->indexes[off1 + 1];
+                   }
+               }
+
+               break;
+           }
+
+       default:
+           elog(ERROR, "unexpected partition strategy: %d",
+                (int) key->strategy);
+   }
+
+   if (overlap)
+   {
+       Assert(with >= 0);
+       ereport(ERROR,
+               (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+                errmsg("partition \"%s\" would overlap partition \"%s\"",
+                       relname, get_rel_name(partdesc->oids[with])),
+                parser_errposition(pstate, spec->location)));
+   }
+}
+
+/*
+ * get_partition_parent
+ *
+ * Returns inheritance parent of a partition by scanning pg_inherits
+ *
+ * Note: Because this function assumes that the relation whose OID is passed
+ * as an argument will have precisely one parent, it should only be called
+ * when it is known that the relation is a partition.
+ */
+Oid
+get_partition_parent(Oid relid)
+{
+   Form_pg_inherits form;
+   Relation    catalogRelation;
+   SysScanDesc scan;
+   ScanKeyData key[2];
+   HeapTuple   tuple;
+   Oid         result;
+
+   catalogRelation = heap_open(InheritsRelationId, AccessShareLock);
+
+   ScanKeyInit(&key[0],
+               Anum_pg_inherits_inhrelid,
+               BTEqualStrategyNumber, F_OIDEQ,
+               ObjectIdGetDatum(relid));
+   ScanKeyInit(&key[1],
+               Anum_pg_inherits_inhseqno,
+               BTEqualStrategyNumber, F_INT4EQ,
+               Int32GetDatum(1));
+
+   scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId, true,
+                             NULL, 2, key);
+
+   tuple = systable_getnext(scan);
+   Assert(HeapTupleIsValid(tuple));
+
+   form = (Form_pg_inherits) GETSTRUCT(tuple);
+   result = form->inhparent;
+
+   systable_endscan(scan);
+   heap_close(catalogRelation, AccessShareLock);
+
+   return result;
+}
+
+/*
+ * get_qual_from_partbound
+ *     Given a parser node for partition bound, return the list of executable
+ *     expressions as partition constraint
+ */
+List *
+get_qual_from_partbound(Relation rel, Relation parent, Node *bound)
+{
+   PartitionBoundSpec *spec = (PartitionBoundSpec *) bound;
+   PartitionKey key = RelationGetPartitionKey(parent);
+   List       *my_qual = NIL;
+   TupleDesc   parent_tupdesc = RelationGetDescr(parent);
+   AttrNumber  parent_attno;
+   AttrNumber *partition_attnos;
+   bool        found_whole_row;
+
+   Assert(key != NULL);
+
+   switch (key->strategy)
+   {
+       case PARTITION_STRATEGY_LIST:
+           Assert(spec->strategy == PARTITION_STRATEGY_LIST);
+           my_qual = get_qual_for_list(key, spec);
+           break;
+
+       case PARTITION_STRATEGY_RANGE:
+           Assert(spec->strategy == PARTITION_STRATEGY_RANGE);
+           my_qual = get_qual_for_range(key, spec);
+           break;
+
+       default:
+           elog(ERROR, "unexpected partition strategy: %d",
+                (int) key->strategy);
+   }
+
+   /*
+    * Translate vars in the generated expression to have correct attnos. Note
+    * that the vars in my_qual bear attnos dictated by key which carries
+    * physical attnos of the parent.  We must allow for a case where physical
+    * attnos of a partition can be different from the parent.
+    */
+   partition_attnos = (AttrNumber *)
+       palloc0(parent_tupdesc->natts * sizeof(AttrNumber));
+   for (parent_attno = 1; parent_attno <= parent_tupdesc->natts;
+        parent_attno++)
+   {
+       Form_pg_attribute attribute = parent_tupdesc->attrs[parent_attno - 1];
+       char       *attname = NameStr(attribute->attname);
+       AttrNumber  partition_attno;
+
+       if (attribute->attisdropped)
+           continue;
+
+       partition_attno = get_attnum(RelationGetRelid(rel), attname);
+       partition_attnos[parent_attno - 1] = partition_attno;
+   }
+
+   my_qual = (List *) map_variable_attnos((Node *) my_qual,
+                                          1, 0,
+                                          partition_attnos,
+                                          parent_tupdesc->natts,
+                                          &found_whole_row);
+   /* there can never be a whole-row reference here */
+   if (found_whole_row)
+       elog(ERROR, "unexpected whole-row reference found in partition key");
+
+   return my_qual;
+}
+
+/*
+ * RelationGetPartitionQual
+ *
+ * Returns a list of partition quals
+ */
+List *
+RelationGetPartitionQual(Relation rel, bool recurse)
+{
+   /* Quick exit */
+   if (!rel->rd_rel->relispartition)
+       return NIL;
+
+   return generate_partition_qual(rel, recurse);
+}
+
+/* Turn an array of OIDs with N elements into a list */
+#define OID_ARRAY_TO_LIST(arr, N, list) \
+   do\
+   {\
+       int     i;\
+       for (i = 0; i < (N); i++)\
+           (list) = lappend_oid((list), (arr)[i]);\
+   } while(0)
+
+/*
+ * RelationGetPartitionDispatchInfo
+ *     Returns information necessary to route tuples down a partition tree
+ *
+ * All the partitions will be locked with lockmode, unless it is NoLock.
+ * A list of the OIDs of all the leaf partition of rel is returned in
+ * *leaf_part_oids.
+ */
+PartitionDispatch *
+RelationGetPartitionDispatchInfo(Relation rel, int lockmode,
+                                int *num_parted, List **leaf_part_oids)
+{
+   PartitionDesc rootpartdesc = RelationGetPartitionDesc(rel);
+   PartitionDispatchData **pd;
+   List       *all_parts = NIL,
+              *parted_rels;
+   ListCell   *lc;
+   int         i,
+               k;
+
+   /*
+    * Lock partitions and make a list of the partitioned ones to prepare
+    * their PartitionDispatch objects below.
+    *
+    * Cannot use find_all_inheritors() here, because then the order of OIDs
+    * in parted_rels list would be unknown, which does not help, because we
+    * we assign indexes within individual PartitionDispatch in an order that
+    * is predetermined (determined by the order of OIDs in individual
+    * partition descriptors).
+    */
+   *num_parted = 1;
+   parted_rels = list_make1(rel);
+   OID_ARRAY_TO_LIST(rootpartdesc->oids, rootpartdesc->nparts, all_parts);
+   foreach(lc, all_parts)
+   {
+       Relation    partrel = heap_open(lfirst_oid(lc), lockmode);
+       PartitionDesc partdesc = RelationGetPartitionDesc(partrel);
+
+       /*
+        * If this partition is a partitioned table, add its children to the
+        * end of the list, so that they are processed as well.
+        */
+       if (partdesc)
+       {
+           (*num_parted)++;
+           parted_rels = lappend(parted_rels, partrel);
+           OID_ARRAY_TO_LIST(partdesc->oids, partdesc->nparts, all_parts);
+       }
+       else
+           heap_close(partrel, NoLock);
+
+       /*
+        * We keep the partitioned ones open until we're done using the
+        * information being collected here (for example, see
+        * ExecEndModifyTable).
+        */
+   }
+
+   /* Generate PartitionDispatch objects for all partitioned tables */
+   pd = (PartitionDispatchData **) palloc(*num_parted *
+                                          sizeof(PartitionDispatchData *));
+   *leaf_part_oids = NIL;
+   i = k = 0;
+   foreach(lc, parted_rels)
+   {
+       Relation    partrel = lfirst(lc);
+       PartitionKey partkey = RelationGetPartitionKey(partrel);
+       PartitionDesc partdesc = RelationGetPartitionDesc(partrel);
+       int         j,
+                   m;
+
+       pd[i] = (PartitionDispatch) palloc(sizeof(PartitionDispatchData));
+       pd[i]->reldesc = partrel;
+       pd[i]->key = partkey;
+       pd[i]->keystate = NIL;
+       pd[i]->partdesc = partdesc;
+       pd[i]->indexes = (int *) palloc(partdesc->nparts * sizeof(int));
+
+       m = 0;
+       for (j = 0; j < partdesc->nparts; j++)
+       {
+           Oid         partrelid = partdesc->oids[j];
+
+           if (get_rel_relkind(partrelid) != RELKIND_PARTITIONED_TABLE)
+           {
+               *leaf_part_oids = lappend_oid(*leaf_part_oids, partrelid);
+               pd[i]->indexes[j] = k++;
+           }
+           else
+           {
+               /*
+                * We can assign indexes this way because of the way
+                * parted_rels has been generated.
+                */
+               pd[i]->indexes[j] = -(i + 1 + m);
+               m++;
+           }
+       }
+       i++;
+   }
+
+   return pd;
+}
+
+/* Module-local functions */
+
+/*
+ * get_qual_for_list
+ *
+ * Returns a list of expressions to use as a list partition's constraint.
+ */
+static List *
+get_qual_for_list(PartitionKey key, PartitionBoundSpec *spec)
+{
+   List       *result;
+   ArrayExpr  *arr;
+   ScalarArrayOpExpr *opexpr;
+   ListCell   *cell,
+              *prev,
+              *next;
+   Node       *keyCol;
+   Oid         operoid;
+   bool        need_relabel,
+               list_has_null = false;
+   NullTest   *nulltest1 = NULL,
+              *nulltest2 = NULL;
+
+   /* Left operand is either a simple Var or arbitrary expression */
+   if (key->partattrs[0] != 0)
+       keyCol = (Node *) makeVar(1,
+                                 key->partattrs[0],
+                                 key->parttypid[0],
+                                 key->parttypmod[0],
+                                 key->parttypcoll[0],
+                                 0);
+   else
+       keyCol = (Node *) copyObject(linitial(key->partexprs));
+
+   /*
+    * We must remove any NULL value in the list; we handle it separately
+    * below.
+    */
+   prev = NULL;
+   for (cell = list_head(spec->listdatums); cell; cell = next)
+   {
+       Const      *val = (Const *) lfirst(cell);
+
+       next = lnext(cell);
+
+       if (val->constisnull)
+       {
+           list_has_null = true;
+           spec->listdatums = list_delete_cell(spec->listdatums,
+                                               cell, prev);
+       }
+       else
+           prev = cell;
+   }
+
+   if (!list_has_null)
+   {
+       /*
+        * Gin up a col IS NOT NULL test that will be AND'd with other
+        * expressions
+        */
+       nulltest1 = makeNode(NullTest);
+       nulltest1->arg = (Expr *) keyCol;
+       nulltest1->nulltesttype = IS_NOT_NULL;
+       nulltest1->argisrow = false;
+       nulltest1->location = -1;
+   }
+   else
+   {
+       /*
+        * Gin up a col IS NULL test that will be OR'd with other expressions
+        */
+       nulltest2 = makeNode(NullTest);
+       nulltest2->arg = (Expr *) keyCol;
+       nulltest2->nulltesttype = IS_NULL;
+       nulltest2->argisrow = false;
+       nulltest2->location = -1;
+   }
+
+   /* Right operand is an ArrayExpr containing this partition's values */
+   arr = makeNode(ArrayExpr);
+   arr->array_typeid = !type_is_array(key->parttypid[0])
+       ? get_array_type(key->parttypid[0])
+       : key->parttypid[0];
+   arr->array_collid = key->parttypcoll[0];
+   arr->element_typeid = key->parttypid[0];
+   arr->elements = spec->listdatums;
+   arr->multidims = false;
+   arr->location = -1;
+
+   /* Get the correct btree equality operator */
+   operoid = get_partition_operator(key, 0, BTEqualStrategyNumber,
+                                    &need_relabel);
+   if (need_relabel || key->partcollation[0] != key->parttypcoll[0])
+       keyCol = (Node *) makeRelabelType((Expr *) keyCol,
+                                         key->partopcintype[0],
+                                         -1,
+                                         key->partcollation[0],
+                                         COERCE_EXPLICIT_CAST);
+
+   /* Build leftop = ANY (rightop) */
+   opexpr = makeNode(ScalarArrayOpExpr);
+   opexpr->opno = operoid;
+   opexpr->opfuncid = get_opcode(operoid);
+   opexpr->useOr = true;
+   opexpr->inputcollid = key->partcollation[0];
+   opexpr->args = list_make2(keyCol, arr);
+   opexpr->location = -1;
+
+   if (nulltest1)
+       result = list_make2(nulltest1, opexpr);
+   else if (nulltest2)
+   {
+       Expr       *or;
+
+       or = makeBoolExpr(OR_EXPR, list_make2(nulltest2, opexpr), -1);
+       result = list_make1(or);
+   }
+   else
+       result = list_make1(opexpr);
+
+   return result;
+}
+
+/*
+ * get_qual_for_range
+ *
+ * Get a list of OpExpr's to use as a range partition's constraint.
+ */
+static List *
+get_qual_for_range(PartitionKey key, PartitionBoundSpec *spec)
+{
+   List       *result = NIL;
+   ListCell   *cell1,
+              *cell2,
+              *partexprs_item;
+   int         i;
+
+   /*
+    * Iterate over columns of the key, emitting an OpExpr for each using the
+    * corresponding lower and upper datums as constant operands.
+    */
+   i = 0;
+   partexprs_item = list_head(key->partexprs);
+   forboth(cell1, spec->lowerdatums, cell2, spec->upperdatums)
+   {
+       PartitionRangeDatum *ldatum = lfirst(cell1),
+                  *udatum = lfirst(cell2);
+       Node       *keyCol;
+       Const      *lower_val = NULL,
+                  *upper_val = NULL;
+       EState     *estate;
+       MemoryContext oldcxt;
+       Expr       *test_expr;
+       ExprState  *test_exprstate;
+       Datum       test_result;
+       bool        isNull;
+       bool        need_relabel = false;
+       Oid         operoid;
+       NullTest   *nulltest;
+
+       /* Left operand */
+       if (key->partattrs[i] != 0)
+       {
+           keyCol = (Node *) makeVar(1,
+                                     key->partattrs[i],
+                                     key->parttypid[i],
+                                     key->parttypmod[i],
+                                     key->parttypcoll[i],
+                                     0);
+       }
+       else
+       {
+           keyCol = (Node *) copyObject(lfirst(partexprs_item));
+           partexprs_item = lnext(partexprs_item);
+       }
+
+       /*
+        * Emit a IS NOT NULL expression for non-Var keys, because whereas
+        * simple attributes are covered by NOT NULL constraints, expression
+        * keys are still nullable which is not acceptable in case of range
+        * partitioning.
+        */
+       if (!IsA(keyCol, Var))
+       {
+           nulltest = makeNode(NullTest);
+           nulltest->arg = (Expr *) keyCol;
+           nulltest->nulltesttype = IS_NOT_NULL;
+           nulltest->argisrow = false;
+           nulltest->location = -1;
+           result = lappend(result, nulltest);
+       }
+
+       /*
+        * Stop at this column if either of lower or upper datum is infinite,
+        * but do emit an OpExpr for the non-infinite datum.
+        */
+       if (!ldatum->infinite)
+           lower_val = (Const *) ldatum->value;
+       if (!udatum->infinite)
+           upper_val = (Const *) udatum->value;
+
+       /*
+        * If lower_val and upper_val are both finite and happen to be equal,
+        * emit only (keyCol = lower_val) for this column, because all rows in
+        * this partition could only ever contain this value (ie, lower_val)
+        * in the current partitioning column.  We must consider further
+        * columns because the above condition does not fully constrain the
+        * rows of this partition.
+        */
+       if (lower_val && upper_val)
+       {
+           /* Get the correct btree equality operator for the test */
+           operoid = get_partition_operator(key, i, BTEqualStrategyNumber,
+                                            &need_relabel);
+
+           /* Create the test expression */
+           estate = CreateExecutorState();
+           oldcxt = MemoryContextSwitchTo(estate->es_query_cxt);
+           test_expr = make_opclause(operoid,
+                                     BOOLOID,
+                                     false,
+                                     (Expr *) lower_val,
+                                     (Expr *) upper_val,
+                                     InvalidOid,
+                                     key->partcollation[i]);
+           fix_opfuncids((Node *) test_expr);
+           test_exprstate = ExecInitExpr(test_expr, NULL);
+           test_result = ExecEvalExprSwitchContext(test_exprstate,
+                                             GetPerTupleExprContext(estate),
+                                                   &isNull, NULL);
+           MemoryContextSwitchTo(oldcxt);
+           FreeExecutorState(estate);
+
+           if (DatumGetBool(test_result))
+           {
+               /* This can never be, but it's better to make sure */
+               if (i == key->partnatts - 1)
+                   elog(ERROR, "invalid range bound specification");
+
+               if (need_relabel || key->partcollation[i] != key->parttypcoll[i])
+                   keyCol = (Node *) makeRelabelType((Expr *) keyCol,
+                                                     key->partopcintype[i],
+                                                     -1,
+                                                     key->partcollation[i],
+                                                     COERCE_EXPLICIT_CAST);
+               result = lappend(result,
+                                make_opclause(operoid,
+                                              BOOLOID,
+                                              false,
+                                              (Expr *) keyCol,
+                                              (Expr *) lower_val,
+                                              InvalidOid,
+                                              key->partcollation[i]));
+
+               /* Go over to consider the next column. */
+               i++;
+               continue;
+           }
+       }
+
+       /*
+        * We can say here that lower_val != upper_val.  Emit expressions
+        * (keyCol >= lower_val) and (keyCol < upper_val), then stop.
+        */
+       if (lower_val)
+       {
+           operoid = get_partition_operator(key, i,
+                                            BTGreaterEqualStrategyNumber,
+                                            &need_relabel);
+
+           if (need_relabel || key->partcollation[i] != key->parttypcoll[i])
+               keyCol = (Node *) makeRelabelType((Expr *) keyCol,
+                                                 key->partopcintype[i],
+                                                 -1,
+                                                 key->partcollation[i],
+                                                 COERCE_EXPLICIT_CAST);
+           result = lappend(result,
+                            make_opclause(operoid,
+                                          BOOLOID,
+                                          false,
+                                          (Expr *) keyCol,
+                                          (Expr *) lower_val,
+                                          InvalidOid,
+                                          key->partcollation[i]));
+       }
+
+       if (upper_val)
+       {
+           operoid = get_partition_operator(key, i,
+                                            BTLessStrategyNumber,
+                                            &need_relabel);
+
+           if (need_relabel || key->partcollation[i] != key->parttypcoll[i])
+               keyCol = (Node *) makeRelabelType((Expr *) keyCol,
+                                                 key->partopcintype[i],
+                                                 -1,
+                                                 key->partcollation[i],
+                                                 COERCE_EXPLICIT_CAST);
+
+           result = lappend(result,
+                            make_opclause(operoid,
+                                          BOOLOID,
+                                          false,
+                                          (Expr *) keyCol,
+                                          (Expr *) upper_val,
+                                          InvalidOid,
+                                          key->partcollation[i]));
+       }
+
+       /*
+        * We can stop at this column, because we would not have checked the
+        * next column when routing a given row into this partition.
+        */
+       break;
+   }
+
+   return result;
+}
+
+/*
+ * get_partition_operator
+ *
+ * Return oid of the operator of given strategy for a given partition key
+ * column.
+ */
+static Oid
+get_partition_operator(PartitionKey key, int col, StrategyNumber strategy,
+                      bool *need_relabel)
+{
+   Oid         operoid;
+
+   /*
+    * First check if there exists an operator of the given strategy, with
+    * this column's type as both its lefttype and righttype, in the
+    * partitioning operator family specified for the column.
+    */
+   operoid = get_opfamily_member(key->partopfamily[col],
+                                 key->parttypid[col],
+                                 key->parttypid[col],
+                                 strategy);
+
+   /*
+    * If one doesn't exist, we must resort to using an operator in the same
+    * opreator family but with the operator class declared input type.  It is
+    * OK to do so, because the column's type is known to be binary-coercible
+    * with the operator class input type (otherwise, the operator class in
+    * question would not have been accepted as the partitioning operator
+    * class).  We must however inform the caller to wrap the non-Const
+    * expression with a RelabelType node to denote the implicit coercion. It
+    * ensures that the resulting expression structurally matches similarly
+    * processed expressions within the optimizer.
+    */
+   if (!OidIsValid(operoid))
+   {
+       operoid = get_opfamily_member(key->partopfamily[col],
+                                     key->partopcintype[col],
+                                     key->partopcintype[col],
+                                     strategy);
+       *need_relabel = true;
+   }
+   else
+       *need_relabel = false;
+
+   if (!OidIsValid(operoid))
+       elog(ERROR, "could not find operator for partitioning");
+
+   return operoid;
+}
+
+/*
+ * generate_partition_qual
+ *
+ * Generate partition predicate from rel's partition bound expression
+ *
+ * Result expression tree is stored CacheMemoryContext to ensure it survives
+ * as long as the relcache entry. But we should be running in a less long-lived
+ * working context. To avoid leaking cache memory if this routine fails partway
+ * through, we build in working memory and then copy the completed structure
+ * into cache memory.
+ */
+static List *
+generate_partition_qual(Relation rel, bool recurse)
+{
+   HeapTuple   tuple;
+   MemoryContext oldcxt;
+   Datum       boundDatum;
+   bool        isnull;
+   Node       *bound;
+   List       *my_qual = NIL,
+              *result = NIL;
+   Relation    parent;
+
+   /* Guard against stack overflow due to overly deep partition tree */
+   check_stack_depth();
+
+   /* Grab at least an AccessShareLock on the parent table */
+   parent = heap_open(get_partition_parent(RelationGetRelid(rel)),
+                      AccessShareLock);
+
+   /* Quick copy */
+   if (rel->rd_partcheck)
+   {
+       if (parent->rd_rel->relispartition && recurse)
+           result = list_concat(generate_partition_qual(parent, true),
+                                copyObject(rel->rd_partcheck));
+       else
+           result = copyObject(rel->rd_partcheck);
+
+       heap_close(parent, AccessShareLock);
+       return result;
+   }
+
+   /* Get pg_class.relpartbound */
+   if (!rel->rd_rel->relispartition)   /* should not happen */
+       elog(ERROR, "relation \"%s\" has relispartition = false",
+            RelationGetRelationName(rel));
+   tuple = SearchSysCache1(RELOID, RelationGetRelid(rel));
+   boundDatum = SysCacheGetAttr(RELOID, tuple,
+                                Anum_pg_class_relpartbound,
+                                &isnull);
+   if (isnull)                 /* should not happen */
+       elog(ERROR, "relation \"%s\" has relpartbound = null",
+            RelationGetRelationName(rel));
+   bound = stringToNode(TextDatumGetCString(boundDatum));
+   ReleaseSysCache(tuple);
+
+   my_qual = get_qual_from_partbound(rel, parent, bound);
+
+   /* If requested, add parent's quals to the list (if any) */
+   if (parent->rd_rel->relispartition && recurse)
+   {
+       List       *parent_check;
+
+       parent_check = generate_partition_qual(parent, true);
+       result = list_concat(parent_check, my_qual);
+   }
+   else
+       result = my_qual;
+
+   /* Save a copy of my_qual in the relcache */
+   oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
+   rel->rd_partcheck = copyObject(my_qual);
+   MemoryContextSwitchTo(oldcxt);
+
+   /* Keep the parent locked until commit */
+   heap_close(parent, NoLock);
+
+   return result;
+}
+
+/* ----------------
+ *     FormPartitionKeyDatum
+ *         Construct values[] and isnull[] arrays for the partition key
+ *         of a tuple.
+ *
+ * pkinfo          partition key execution info
+ * slot            Heap tuple from which to extract partition key
+ * estate          executor state for evaluating any partition key
+ *                 expressions (must be non-NULL)
+ * values          Array of partition key Datums (output area)
+ * isnull          Array of is-null indicators (output area)
+ *
+ * the ecxt_scantuple slot of estate's per-tuple expr context must point to
+ * the heap tuple passed in.
+ * ----------------
+ */
+static void
+FormPartitionKeyDatum(PartitionDispatch pd,
+                     TupleTableSlot *slot,
+                     EState *estate,
+                     Datum *values,
+                     bool *isnull)
+{
+   ListCell   *partexpr_item;
+   int         i;
+
+   if (pd->key->partexprs != NIL && pd->keystate == NIL)
+   {
+       /* Check caller has set up context correctly */
+       Assert(estate != NULL &&
+              GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
+
+       /* First time through, set up expression evaluation state */
+       pd->keystate = (List *) ExecPrepareExpr((Expr *) pd->key->partexprs,
+                                               estate);
+   }
+
+   partexpr_item = list_head(pd->keystate);
+   for (i = 0; i < pd->key->partnatts; i++)
+   {
+       AttrNumber  keycol = pd->key->partattrs[i];
+       Datum       datum;
+       bool        isNull;
+
+       if (keycol != 0)
+       {
+           /* Plain column; get the value directly from the heap tuple */
+           datum = slot_getattr(slot, keycol, &isNull);
+       }
+       else
+       {
+           /* Expression; need to evaluate it */
+           if (partexpr_item == NULL)
+               elog(ERROR, "wrong number of partition key expressions");
+           datum = ExecEvalExprSwitchContext((ExprState *) lfirst(partexpr_item),
+                                             GetPerTupleExprContext(estate),
+                                             &isNull,
+                                             NULL);
+           partexpr_item = lnext(partexpr_item);
+       }
+       values[i] = datum;
+       isnull[i] = isNull;
+   }
+
+   if (partexpr_item != NULL)
+       elog(ERROR, "wrong number of partition key expressions");
+}
+
+/*
+ * get_partition_for_tuple
+ *     Finds a leaf partition for tuple contained in *slot
+ *
+ * Returned value is the sequence number of the leaf partition thus found,
+ * or -1 if no leaf partition is found for the tuple.  *failed_at is set
+ * to the OID of the partitioned table whose partition was not found in
+ * the latter case.
+ */
+int
+get_partition_for_tuple(PartitionDispatch * pd,
+                       TupleTableSlot *slot,
+                       EState *estate,
+                       Oid *failed_at)
+{
+   PartitionDispatch parent;
+   Datum       values[PARTITION_MAX_KEYS];
+   bool        isnull[PARTITION_MAX_KEYS];
+   int         cur_offset,
+               cur_index;
+   int         i;
+
+   /* start with the root partitioned table */
+   parent = pd[0];
+   while (true)
+   {
+       PartitionKey key = parent->key;
+       PartitionDesc partdesc = parent->partdesc;
+
+       /* Quick exit */
+       if (partdesc->nparts == 0)
+       {
+           *failed_at = RelationGetRelid(parent->reldesc);
+           return -1;
+       }
+
+       /* Extract partition key from tuple */
+       FormPartitionKeyDatum(parent, slot, estate, values, isnull);
+
+       if (key->strategy == PARTITION_STRATEGY_RANGE)
+       {
+           /* Disallow nulls in the range partition key of the tuple */
+           for (i = 0; i < key->partnatts; i++)
+               if (isnull[i])
+                   ereport(ERROR,
+                           (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+                       errmsg("range partition key of row contains null")));
+       }
+
+       if (partdesc->boundinfo->has_null && isnull[0])
+           /* Tuple maps to the null-accepting list partition */
+           cur_index = partdesc->boundinfo->null_index;
+       else
+       {
+           /* Else bsearch in partdesc->boundinfo */
+           bool        equal = false;
+
+           cur_offset = partition_bound_bsearch(key, partdesc->boundinfo,
+                                                values, false, &equal);
+           switch (key->strategy)
+           {
+               case PARTITION_STRATEGY_LIST:
+                   if (cur_offset >= 0 && equal)
+                       cur_index = partdesc->boundinfo->indexes[cur_offset];
+                   else
+                       cur_index = -1;
+                   break;
+
+               case PARTITION_STRATEGY_RANGE:
+
+                   /*
+                    * Offset returned is such that the bound at offset is
+                    * found to be less or equal with the tuple. So, the bound
+                    * at offset+1 would be the upper bound.
+                    */
+                   cur_index = partdesc->boundinfo->indexes[cur_offset + 1];
+                   break;
+
+               default:
+                   elog(ERROR, "unexpected partition strategy: %d",
+                        (int) key->strategy);
+           }
+       }
+
+       /*
+        * cur_index < 0 means we failed to find a partition of this parent.
+        * cur_index >= 0 means we either found the leaf partition, or the
+        * next parent to find a partition of.
+        */
+       if (cur_index < 0)
+       {
+           *failed_at = RelationGetRelid(parent->reldesc);
+           return -1;
+       }
+       else if (parent->indexes[cur_index] < 0)
+           parent = pd[-parent->indexes[cur_index]];
+       else
+           break;
+   }
+
+   return parent->indexes[cur_index];
+}
+
+/*
+ * qsort_partition_list_value_cmp
+ *
+ * Compare two list partition bound datums
+ */
+static int32
+qsort_partition_list_value_cmp(const void *a, const void *b, void *arg)
+{
+   Datum       val1 = (*(const PartitionListValue **) a)->value,
+               val2 = (*(const PartitionListValue **) b)->value;
+   PartitionKey key = (PartitionKey) arg;
+
+   return DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
+                                          key->partcollation[0],
+                                          val1, val2));
+}
+
+/*
+ * make_one_range_bound
+ *
+ * Return a PartitionRangeBound given a list of PartitionRangeDatum elements
+ * and a flag telling whether the bound is lower or not.  Made into a function
+ * because there are multiple sites that want to use this facility.
+ */
+static PartitionRangeBound *
+make_one_range_bound(PartitionKey key, int index, List *datums, bool lower)
+{
+   PartitionRangeBound *bound;
+   ListCell   *cell;
+   int         i;
+
+   bound = (PartitionRangeBound *) palloc0(sizeof(PartitionRangeBound));
+   bound->index = index;
+   bound->datums = (Datum *) palloc0(key->partnatts * sizeof(Datum));
+   bound->content = (RangeDatumContent *) palloc0(key->partnatts *
+                                                  sizeof(RangeDatumContent));
+   bound->lower = lower;
+
+   i = 0;
+   foreach(cell, datums)
+   {
+       PartitionRangeDatum *datum = lfirst(cell);
+
+       /* What's contained in this range datum? */
+       bound->content[i] = !datum->infinite
+           ? RANGE_DATUM_FINITE
+           : (lower ? RANGE_DATUM_NEG_INF
+              : RANGE_DATUM_POS_INF);
+
+       if (bound->content[i] == RANGE_DATUM_FINITE)
+       {
+           Const      *val = (Const *) datum->value;
+
+           if (val->constisnull)
+               elog(ERROR, "invalid range bound datum");
+           bound->datums[i] = val->constvalue;
+       }
+
+       i++;
+   }
+
+   return bound;
+}
+
+/* Used when sorting range bounds across all range partitions */
+static int32
+qsort_partition_rbound_cmp(const void *a, const void *b, void *arg)
+{
+   PartitionRangeBound *b1 = (*(PartitionRangeBound *const *) a);
+   PartitionRangeBound *b2 = (*(PartitionRangeBound *const *) b);
+   PartitionKey key = (PartitionKey) arg;
+
+   return partition_rbound_cmp(key, b1->datums, b1->content, b1->lower, b2);
+}
+
+/*
+ * partition_rbound_cmp
+ *
+ * Return for two range bounds whether the 1st one (specified in datum1,
+ * content1, and lower1) is <=, =, >= the bound specified in *b2
+ */
+static int32
+partition_rbound_cmp(PartitionKey key,
+                    Datum *datums1, RangeDatumContent *content1, bool lower1,
+                    PartitionRangeBound *b2)
+{
+   int32       cmpval;
+   int         i;
+   Datum      *datums2 = b2->datums;
+   RangeDatumContent *content2 = b2->content;
+   bool        lower2 = b2->lower;
+
+   for (i = 0; i < key->partnatts; i++)
+   {
+       /*
+        * First, handle cases involving infinity, which don't require
+        * invoking the comparison proc.
+        */
+       if (content1[i] != RANGE_DATUM_FINITE &&
+           content2[i] != RANGE_DATUM_FINITE)
+
+           /*
+            * Both are infinity, so they are equal unless one is negative
+            * infinity and other positive (or vice versa)
+            */
+           return content1[i] == content2[i] ? 0
+               : (content1[i] < content2[i] ? -1 : 1);
+       else if (content1[i] != RANGE_DATUM_FINITE)
+           return content1[i] == RANGE_DATUM_NEG_INF ? -1 : 1;
+       else if (content2[i] != RANGE_DATUM_FINITE)
+           return content2[i] == RANGE_DATUM_NEG_INF ? 1 : -1;
+
+       cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[i],
+                                                key->partcollation[i],
+                                                datums1[i],
+                                                datums2[i]));
+       if (cmpval != 0)
+           break;
+   }
+
+   /*
+    * If the comparison is anything other than equal, we're done. If they
+    * compare equal though, we still have to consider whether the boundaries
+    * are inclusive or exclusive.  Exclusive one is considered smaller of the
+    * two.
+    */
+   if (cmpval == 0 && lower1 != lower2)
+       cmpval = lower1 ? 1 : -1;
+
+   return cmpval;
+}
+
+/*
+ * partition_rbound_datum_cmp
+ *
+ * Return whether range bound (specified in rb_datums, rb_content, and
+ * rb_lower) <=, =, >= partition key of tuple (tuple_datums)
+ */
+static int32
+partition_rbound_datum_cmp(PartitionKey key,
+                          Datum *rb_datums, RangeDatumContent *rb_content,
+                          Datum *tuple_datums)
+{
+   int         i;
+   int32       cmpval = -1;
+
+   for (i = 0; i < key->partnatts; i++)
+   {
+       if (rb_content[i] != RANGE_DATUM_FINITE)
+           return rb_content[i] == RANGE_DATUM_NEG_INF ? -1 : 1;
+
+       cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[i],
+                                                key->partcollation[i],
+                                                rb_datums[i],
+                                                tuple_datums[i]));
+       if (cmpval != 0)
+           break;
+   }
+
+   return cmpval;
+}
+
+/*
+ * partition_bound_cmp
+ *
+ * Return whether the bound at offset in boundinfo is <=, =, >= the argument
+ * specified in *probe.
+ */
+static int32
+partition_bound_cmp(PartitionKey key, PartitionBoundInfo boundinfo,
+                   int offset, void *probe, bool probe_is_bound)
+{
+   Datum      *bound_datums = boundinfo->datums[offset];
+   int32       cmpval = -1;
+
+   switch (key->strategy)
+   {
+       case PARTITION_STRATEGY_LIST:
+           cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
+                                                    key->partcollation[0],
+                                                    bound_datums[0],
+                                                    *(Datum *) probe));
+           break;
+
+       case PARTITION_STRATEGY_RANGE:
+           {
+               RangeDatumContent *content = boundinfo->content[offset];
+
+               if (probe_is_bound)
+               {
+                   /*
+                    * We need to pass whether the existing bound is a lower
+                    * bound, so that two equal-valued lower and upper bounds
+                    * are not regarded equal.
+                    */
+                   bool        lower = boundinfo->indexes[offset] < 0;
+
+                   cmpval = partition_rbound_cmp(key,
+                                               bound_datums, content, lower,
+                                             (PartitionRangeBound *) probe);
+               }
+               else
+                   cmpval = partition_rbound_datum_cmp(key,
+                                                       bound_datums, content,
+                                                       (Datum *) probe);
+               break;
+           }
+
+       default:
+           elog(ERROR, "unexpected partition strategy: %d",
+                (int) key->strategy);
+   }
+
+   return cmpval;
+}
+
+/*
+ * Binary search on a collection of partition bounds. Returns greatest index
+ * of bound in array boundinfo->datums which is less or equal with *probe.
+ * If all bounds in the array are greater than *probe, -1 is returned.
+ *
+ * *probe could either be a partition bound or a Datum array representing
+ * the partition key of a tuple being routed; probe_is_bound tells which.
+ * We pass that down to the comparison function so that it can interpret the
+ * contents of *probe accordingly.
+ *
+ * *is_equal is set to whether the bound at the returned index is equal with
+ * *probe.
+ */
+static int
+partition_bound_bsearch(PartitionKey key, PartitionBoundInfo boundinfo,
+                       void *probe, bool probe_is_bound, bool *is_equal)
+{
+   int         lo,
+               hi,
+               mid;
+
+   lo = -1;
+   hi = boundinfo->ndatums - 1;
+   while (lo < hi)
+   {
+       int32       cmpval;
+
+       mid = (lo + hi + 1) / 2;
+       cmpval = partition_bound_cmp(key, boundinfo, mid, probe,
+                                    probe_is_bound);
+       if (cmpval <= 0)
+       {
+           lo = mid;
+           *is_equal = (cmpval == 0);
+       }
+       else
+           hi = mid - 1;
+   }
+
+   return lo;
+}
index 8fabe6899f65796e9c4495c6d27dd0ffdc93f7be..724b41e64cdee99c6353f0e605ea10219b9dfbc9 100644 (file)
@@ -368,7 +368,7 @@ CreateConstraintEntry(const char *constraintName,
         */
        recordDependencyOnSingleRelExpr(&conobject, conExpr, relId,
                                        DEPENDENCY_NORMAL,
-                                       DEPENDENCY_NORMAL);
+                                       DEPENDENCY_NORMAL, false);
    }
 
    /* Post creation hook for new constraint */
index c617abb223b07e2ee430e81861896213f6efdd36..f4afcd9aae14be9d13e5e12e949979896c6cb5fd 100644 (file)
@@ -201,7 +201,8 @@ analyze_rel(Oid relid, RangeVar *relation, int options,
     * locked the relation.
     */
    if (onerel->rd_rel->relkind == RELKIND_RELATION ||
-       onerel->rd_rel->relkind == RELKIND_MATVIEW)
+       onerel->rd_rel->relkind == RELKIND_MATVIEW ||
+       onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
    {
        /* Regular table, so we'll use the regular row acquisition function */
        acquirefunc = acquire_sample_rows;
@@ -1317,7 +1318,8 @@ acquire_inherited_sample_rows(Relation onerel, int elevel,
 
        /* Check table type (MATVIEW can't happen, but might as well allow) */
        if (childrel->rd_rel->relkind == RELKIND_RELATION ||
-           childrel->rd_rel->relkind == RELKIND_MATVIEW)
+           childrel->rd_rel->relkind == RELKIND_MATVIEW ||
+           childrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
        {
            /* Regular table, so use the regular row acquisition function */
            acquirefunc = acquire_sample_rows;
index ec5d6f15659f590128f218f373d663ae02e65e2b..270be0af18e4a5b9e75202a306d70cd7a234d599 100644 (file)
@@ -161,6 +161,11 @@ typedef struct CopyStateData
    ExprState **defexprs;       /* array of default att expressions */
    bool        volatile_defexprs;      /* is any of defexprs volatile? */
    List       *range_table;
+   PartitionDispatch      *partition_dispatch_info;
+   int                     num_dispatch;
+   int                     num_partitions;
+   ResultRelInfo          *partitions;
+   TupleConversionMap    **partition_tupconv_maps;
 
    /*
     * These variables are used to reduce overhead in textual COPY FROM.
@@ -1397,6 +1402,71 @@ BeginCopy(ParseState *pstate,
                    (errcode(ERRCODE_UNDEFINED_COLUMN),
                     errmsg("table \"%s\" does not have OIDs",
                            RelationGetRelationName(cstate->rel))));
+
+       /*
+        * Initialize state for CopyFrom tuple routing.  Watch out for
+        * any foreign partitions.
+        */
+       if (is_from && rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+       {
+           PartitionDispatch *pd;
+           List           *leaf_parts;
+           ListCell       *cell;
+           int             i,
+                           num_parted,
+                           num_leaf_parts;
+           ResultRelInfo  *leaf_part_rri;
+
+           /* Get the tuple-routing information and lock partitions */
+           pd = RelationGetPartitionDispatchInfo(rel, RowExclusiveLock,
+                                                 &num_parted, &leaf_parts);
+           num_leaf_parts = list_length(leaf_parts);
+           cstate->partition_dispatch_info = pd;
+           cstate->num_dispatch = num_parted;
+           cstate->num_partitions = num_leaf_parts;
+           cstate->partitions = (ResultRelInfo *) palloc(num_leaf_parts *
+                                                       sizeof(ResultRelInfo));
+           cstate->partition_tupconv_maps = (TupleConversionMap **)
+                       palloc0(num_leaf_parts * sizeof(TupleConversionMap *));
+
+           leaf_part_rri = cstate->partitions;
+           i = 0;
+           foreach(cell, leaf_parts)
+           {
+               Relation    partrel;
+
+               /*
+                * We locked all the partitions above including the leaf
+                * partitions.  Note that each of the relations in
+                * cstate->partitions will be closed by CopyFrom() after
+                * it's finished with its processing.
+                */
+               partrel = heap_open(lfirst_oid(cell), NoLock);
+
+               /*
+                * Verify result relation is a valid target for the current
+                * operation.
+                */
+               CheckValidResultRel(partrel, CMD_INSERT);
+
+               InitResultRelInfo(leaf_part_rri,
+                                 partrel,
+                                 1,     /* dummy */
+                                 false, /* no partition constraint check */
+                                 0);
+
+               /* Open partition indices */
+               ExecOpenIndices(leaf_part_rri, false);
+
+               if (!equalTupleDescs(tupDesc, RelationGetDescr(partrel)))
+                   cstate->partition_tupconv_maps[i] =
+                               convert_tuples_by_name(tupDesc,
+                                   RelationGetDescr(partrel),
+                                   gettext_noop("could not convert row type"));
+               leaf_part_rri++;
+               i++;
+           }
+       }
    }
    else
    {
@@ -1751,6 +1821,12 @@ BeginCopyTo(ParseState *pstate,
                    (errcode(ERRCODE_WRONG_OBJECT_TYPE),
                     errmsg("cannot copy from sequence \"%s\"",
                            RelationGetRelationName(rel))));
+       else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+           ereport(ERROR,
+                   (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                    errmsg("cannot copy from partitioned table \"%s\"",
+                           RelationGetRelationName(rel)),
+                    errhint("Try the COPY (SELECT ...) TO variant.")));
        else
            ereport(ERROR,
                    (errcode(ERRCODE_WRONG_OBJECT_TYPE),
@@ -2249,6 +2325,7 @@ CopyFrom(CopyState cstate)
    Datum      *values;
    bool       *nulls;
    ResultRelInfo *resultRelInfo;
+   ResultRelInfo *saved_resultRelInfo = NULL;
    EState     *estate = CreateExecutorState(); /* for ExecConstraints() */
    ExprContext *econtext;
    TupleTableSlot *myslot;
@@ -2275,6 +2352,7 @@ CopyFrom(CopyState cstate)
     * only hint about them in the view case.)
     */
    if (cstate->rel->rd_rel->relkind != RELKIND_RELATION &&
+       cstate->rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE &&
        !(cstate->rel->trigdesc &&
          cstate->rel->trigdesc->trig_insert_instead_row))
    {
@@ -2385,6 +2463,7 @@ CopyFrom(CopyState cstate)
    InitResultRelInfo(resultRelInfo,
                      cstate->rel,
                      1,        /* dummy rangetable index */
+                     true,     /* do load partition check expression */
                      0);
 
    ExecOpenIndices(resultRelInfo, false);
@@ -2407,11 +2486,13 @@ CopyFrom(CopyState cstate)
     * BEFORE/INSTEAD OF triggers, or we need to evaluate volatile default
     * expressions. Such triggers or expressions might query the table we're
     * inserting to, and act differently if the tuples that have already been
-    * processed and prepared for insertion are not there.
+    * processed and prepared for insertion are not there.  We also can't
+    * do it if the table is partitioned.
     */
    if ((resultRelInfo->ri_TrigDesc != NULL &&
         (resultRelInfo->ri_TrigDesc->trig_insert_before_row ||
          resultRelInfo->ri_TrigDesc->trig_insert_instead_row)) ||
+       cstate->partition_dispatch_info != NULL ||
        cstate->volatile_defexprs)
    {
        useHeapMultiInsert = false;
@@ -2488,6 +2569,59 @@ CopyFrom(CopyState cstate)
        slot = myslot;
        ExecStoreTuple(tuple, slot, InvalidBuffer, false);
 
+       /* Determine the partition to heap_insert the tuple into */
+       if (cstate->partition_dispatch_info)
+       {
+           int     leaf_part_index;
+           TupleConversionMap *map;
+
+           /*
+            * Away we go ... If we end up not finding a partition after all,
+            * ExecFindPartition() does not return and errors out instead.
+            * Otherwise, the returned value is to be used as an index into
+            * arrays mt_partitions[] and mt_partition_tupconv_maps[] that
+            * will get us the ResultRelInfo and TupleConversionMap for the
+            * partition, respectively.
+            */
+           leaf_part_index = ExecFindPartition(resultRelInfo,
+                                           cstate->partition_dispatch_info,
+                                               slot,
+                                               estate);
+           Assert(leaf_part_index >= 0 &&
+                  leaf_part_index < cstate->num_partitions);
+
+           /*
+            * Save the old ResultRelInfo and switch to the one corresponding
+            * to the selected partition.
+            */
+           saved_resultRelInfo = resultRelInfo;
+           resultRelInfo = cstate->partitions + leaf_part_index;
+
+           /* We do not yet have a way to insert into a foreign partition */
+           if (resultRelInfo->ri_FdwRoutine)
+               ereport(ERROR,
+                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                        errmsg("cannot route inserted tuples to a foreign table")));
+
+           /*
+            * For ExecInsertIndexTuples() to work on the partition's indexes
+            */
+           estate->es_result_relation_info = resultRelInfo;
+
+           /*
+            * We might need to convert from the parent rowtype to the
+            * partition rowtype.
+            */
+           map = cstate->partition_tupconv_maps[leaf_part_index];
+           if (map)
+           {
+               tuple = do_convert_tuple(tuple, map);
+               ExecStoreTuple(tuple, slot, InvalidBuffer, true);
+           }
+
+           tuple->t_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
+       }
+
        skip_tuple = false;
 
        /* BEFORE ROW INSERT Triggers */
@@ -2513,7 +2647,8 @@ CopyFrom(CopyState cstate)
            else
            {
                /* Check the constraints of the tuple */
-               if (cstate->rel->rd_att->constr)
+               if (cstate->rel->rd_att->constr ||
+                   resultRelInfo->ri_PartitionCheck)
                    ExecConstraints(resultRelInfo, slot, estate);
 
                if (useHeapMultiInsert)
@@ -2546,7 +2681,8 @@ CopyFrom(CopyState cstate)
                    List       *recheckIndexes = NIL;
 
                    /* OK, store the tuple and create index entries for it */
-                   heap_insert(cstate->rel, tuple, mycid, hi_options, bistate);
+                   heap_insert(resultRelInfo->ri_RelationDesc, tuple, mycid,
+                               hi_options, bistate);
 
                    if (resultRelInfo->ri_NumIndices > 0)
                        recheckIndexes = ExecInsertIndexTuples(slot,
@@ -2570,6 +2706,12 @@ CopyFrom(CopyState cstate)
             * tuples inserted by an INSERT command.
             */
            processed++;
+
+           if (saved_resultRelInfo)
+           {
+               resultRelInfo = saved_resultRelInfo;
+               estate->es_result_relation_info = resultRelInfo;
+           }
        }
    }
 
@@ -2607,6 +2749,32 @@ CopyFrom(CopyState cstate)
 
    ExecCloseIndices(resultRelInfo);
 
+   /* Close all the partitioned tables, leaf partitions, and their indices */
+   if (cstate->partition_dispatch_info)
+   {
+       int     i;
+
+       /*
+        * Remember cstate->partition_dispatch_info[0] corresponds to the root
+        * partitioned table, which we must not try to close, because it is
+        * the main target table of COPY that will be closed eventually by
+        * DoCopy().
+        */
+       for (i = 1; i < cstate->num_dispatch; i++)
+       {
+           PartitionDispatch pd = cstate->partition_dispatch_info[i];
+
+           heap_close(pd->reldesc, NoLock);
+       }
+       for (i = 0; i < cstate->num_partitions; i++)
+       {
+           ResultRelInfo *resultRelInfo = cstate->partitions + i;
+
+           ExecCloseIndices(resultRelInfo);
+           heap_close(resultRelInfo->ri_RelationDesc, NoLock);
+       }
+   }
+
    FreeExecutorState(estate);
 
    /*
index 5b4f6affcce20d4b3970feb4149f89cc2ed747fe..d6d52d99295bd20480b3e588c733a15d6c04d6b5 100644 (file)
@@ -112,7 +112,7 @@ create_ctas_internal(List *attrList, IntoClause *into)
     * Create the relation.  (This will error out if there's an existing view,
     * so we don't need more code to complain if "replace" is false.)
     */
-   intoRelationAddr = DefineRelation(create, relkind, InvalidOid, NULL);
+   intoRelationAddr = DefineRelation(create, relkind, InvalidOid, NULL, NULL);
 
    /*
     * If necessary, create a TOAST table for the target table.  Note that
index 85817c6530252dc0b2bc361a6342eb943f7e6678..eeb2b1fe80d2b68a5a8346f5dd797313b3008e09 100644 (file)
@@ -69,8 +69,6 @@ static void ComputeIndexAttrs(IndexInfo *indexInfo,
                  char *accessMethodName, Oid accessMethodId,
                  bool amcanorder,
                  bool isconstraint);
-static Oid GetIndexOpClass(List *opclass, Oid attrType,
-               char *accessMethodName, Oid accessMethodId);
 static char *ChooseIndexName(const char *tabname, Oid namespaceId,
                List *colnames, List *exclusionOpNames,
                bool primary, bool isconstraint);
@@ -383,6 +381,11 @@ DefineIndex(Oid relationId,
                    (errcode(ERRCODE_WRONG_OBJECT_TYPE),
                     errmsg("cannot create index on foreign table \"%s\"",
                            RelationGetRelationName(rel))));
+       else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+           ereport(ERROR,
+                   (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                    errmsg("cannot create index on partitioned table \"%s\"",
+                           RelationGetRelationName(rel))));
        else
            ereport(ERROR,
                    (errcode(ERRCODE_WRONG_OBJECT_TYPE),
@@ -1145,10 +1148,10 @@ ComputeIndexAttrs(IndexInfo *indexInfo,
        /*
         * Identify the opclass to use.
         */
-       classOidP[attn] = GetIndexOpClass(attribute->opclass,
-                                         atttype,
-                                         accessMethodName,
-                                         accessMethodId);
+       classOidP[attn] = ResolveOpClass(attribute->opclass,
+                                        atttype,
+                                        accessMethodName,
+                                        accessMethodId);
 
        /*
         * Identify the exclusion operator, if any.
@@ -1255,10 +1258,13 @@ ComputeIndexAttrs(IndexInfo *indexInfo,
 
 /*
  * Resolve possibly-defaulted operator class specification
+ *
+ * Note: This is used to resolve operator class specification in index and
+ * partition key definitions.
  */
-static Oid
-GetIndexOpClass(List *opclass, Oid attrType,
-               char *accessMethodName, Oid accessMethodId)
+Oid
+ResolveOpClass(List *opclass, Oid attrType,
+              char *accessMethodName, Oid accessMethodId)
 {
    char       *schemaname;
    char       *opcname;
index a0c0d75977b0314d532a7ef92b335a84afc23602..9e62e00b8dc0341d0bc61dcab8e83f1774b4f4a4 100644 (file)
@@ -87,7 +87,7 @@ RangeVarCallbackForLockTable(const RangeVar *rv, Oid relid, Oid oldrelid,
                                 * check */
 
    /* Currently, we only allow plain tables to be locked */
-   if (relkind != RELKIND_RELATION)
+   if (relkind != RELKIND_RELATION && relkind != RELKIND_PARTITIONED_TABLE)
        ereport(ERROR,
                (errcode(ERRCODE_WRONG_OBJECT_TYPE),
                 errmsg("\"%s\" is not a table",
index 70e22c100001b776594e404bdcd3df7217716e38..6da3205c9e15d6039ee36f5db6b8c0bdcdcd9edb 100644 (file)
@@ -88,7 +88,7 @@ RangeVarCallbackForPolicy(const RangeVar *rv, Oid relid, Oid oldrelid,
                        rv->relname)));
 
    /* Relation type MUST be a table. */
-   if (relkind != RELKIND_RELATION)
+   if (relkind != RELKIND_RELATION && relkind != RELKIND_PARTITIONED_TABLE)
        ereport(ERROR,
                (errcode(ERRCODE_WRONG_OBJECT_TYPE),
                 errmsg("\"%s\" is not a table", rv->relname)));
@@ -384,7 +384,8 @@ RemovePolicyById(Oid policy_id)
    relid = ((Form_pg_policy) GETSTRUCT(tuple))->polrelid;
 
    rel = heap_open(relid, AccessExclusiveLock);
-   if (rel->rd_rel->relkind != RELKIND_RELATION)
+   if (rel->rd_rel->relkind != RELKIND_RELATION &&
+       rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
        ereport(ERROR,
                (errcode(ERRCODE_WRONG_OBJECT_TYPE),
                 errmsg("\"%s\" is not a table",
index 5bd7e124c184d477912a0249466da2e6a7a4ec0e..2b0ae34830179d720a095f719963c464a35ed62f 100644 (file)
@@ -110,7 +110,8 @@ ExecSecLabelStmt(SecLabelStmt *stmt)
                relation->rd_rel->relkind != RELKIND_VIEW &&
                relation->rd_rel->relkind != RELKIND_MATVIEW &&
                relation->rd_rel->relkind != RELKIND_COMPOSITE_TYPE &&
-               relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
+               relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
+               relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
                ereport(ERROR,
                        (errcode(ERRCODE_WRONG_OBJECT_TYPE),
                         errmsg("\"%s\" is not a table, view, materialized view, composite type, or foreign table",
index 7e37108b8d673d424a8473f21dde3a7f8031f717..d953b4408bddf3a80e7b2807d3d939c1b1db1430 100644 (file)
@@ -234,7 +234,7 @@ DefineSequence(ParseState *pstate, CreateSeqStmt *seq)
    stmt->tablespacename = NULL;
    stmt->if_not_exists = seq->if_not_exists;
 
-   address = DefineRelation(stmt, RELKIND_SEQUENCE, seq->ownerId, NULL);
+   address = DefineRelation(stmt, RELKIND_SEQUENCE, seq->ownerId, NULL, NULL);
    seqoid = address.objectId;
    Assert(seqoid != InvalidOid);
 
@@ -1475,7 +1475,8 @@ process_owned_by(Relation seqrel, List *owned_by)
 
        /* Must be a regular or foreign table */
        if (!(tablerel->rd_rel->relkind == RELKIND_RELATION ||
-             tablerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE))
+             tablerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE ||
+             tablerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE))
            ereport(ERROR,
                    (errcode(ERRCODE_WRONG_OBJECT_TYPE),
                     errmsg("referenced relation \"%s\" is not a table or foreign table",
index 6322fa75a76d12cce948779821fb4e3531995aa8..c77b216d4f775ecaed21db596af1d7db6436e5e8 100644 (file)
@@ -29,6 +29,7 @@
 #include "catalog/indexing.h"
 #include "catalog/namespace.h"
 #include "catalog/objectaccess.h"
+#include "catalog/partition.h"
 #include "catalog/pg_am.h"
 #include "catalog/pg_collation.h"
 #include "catalog/pg_constraint.h"
@@ -65,6 +66,9 @@
 #include "nodes/parsenodes.h"
 #include "optimizer/clauses.h"
 #include "optimizer/planner.h"
+#include "optimizer/predtest.h"
+#include "optimizer/prep.h"
+#include "optimizer/var.h"
 #include "parser/parse_clause.h"
 #include "parser/parse_coerce.h"
 #include "parser/parse_collate.h"
@@ -162,6 +166,7 @@ typedef struct AlteredTableInfo
    Oid         newTableSpace;  /* new tablespace; 0 means no change */
    bool        chgPersistence; /* T if SET LOGGED/UNLOGGED is used */
    char        newrelpersistence;      /* if above is true */
+   List       *partition_constraint; /* for attach partition validation */
    /* Objects to rebuild after completing ALTER TYPE operations */
    List       *changedConstraintOids;  /* OIDs of constraints to rebuild */
    List       *changedConstraintDefs;  /* string definitions of same */
@@ -252,6 +257,12 @@ static const struct dropmsgstrings dropmsgstringarray[] = {
        gettext_noop("foreign table \"%s\" does not exist, skipping"),
        gettext_noop("\"%s\" is not a foreign table"),
    gettext_noop("Use DROP FOREIGN TABLE to remove a foreign table.")},
+   {RELKIND_PARTITIONED_TABLE,
+       ERRCODE_UNDEFINED_TABLE,
+       gettext_noop("table \"%s\" does not exist"),
+       gettext_noop("table \"%s\" does not exist, skipping"),
+       gettext_noop("\"%s\" is not a table"),
+   gettext_noop("Use DROP TABLE to remove a table.")},
    {'\0', 0, NULL, NULL, NULL, NULL}
 };
 
@@ -272,7 +283,8 @@ struct DropRelationCallbackState
 
 static void truncate_check_rel(Relation rel);
 static List *MergeAttributes(List *schema, List *supers, char relpersistence,
-               List **supOids, List **supconstr, int *supOidCount);
+               bool is_partition, List **supOids, List **supconstr,
+               int *supOidCount);
 static bool MergeCheckConstraint(List *constraints, char *name, Node *expr);
 static void MergeAttributesIntoExisting(Relation child_rel, Relation parent_rel);
 static void MergeConstraintsIntoExisting(Relation child_rel, Relation parent_rel);
@@ -339,7 +351,9 @@ static void add_column_datatype_dependency(Oid relid, int32 attnum, Oid typid);
 static void add_column_collation_dependency(Oid relid, int32 attnum, Oid collid);
 static void ATPrepAddOids(List **wqueue, Relation rel, bool recurse,
              AlterTableCmd *cmd, LOCKMODE lockmode);
+static void ATPrepDropNotNull(Relation rel, bool recurse, bool recursing);
 static ObjectAddress ATExecDropNotNull(Relation rel, const char *colName, LOCKMODE lockmode);
+static void ATPrepSetNotNull(Relation rel, bool recurse, bool recursing);
 static ObjectAddress ATExecSetNotNull(AlteredTableInfo *tab, Relation rel,
                 const char *colName, LOCKMODE lockmode);
 static ObjectAddress ATExecColumnDefault(Relation rel, const char *colName,
@@ -433,6 +447,15 @@ static void RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid,
                                Oid oldRelOid, void *arg);
 static void RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid,
                                 Oid oldrelid, void *arg);
+static bool is_partition_attr(Relation rel, AttrNumber attnum, bool *used_in_expr);
+static PartitionSpec *transformPartitionSpec(Relation rel, PartitionSpec *partspec, char *strategy);
+static void ComputePartitionAttrs(Relation rel, List *partParams, AttrNumber *partattrs,
+                     List **partexprs, Oid *partopclass, Oid *partcollation);
+static void CreateInheritance(Relation child_rel, Relation parent_rel);
+static void RemoveInheritance(Relation child_rel, Relation parent_rel);
+static ObjectAddress ATExecAttachPartition(List **wqueue, Relation rel,
+                       PartitionCmd *cmd);
+static ObjectAddress ATExecDetachPartition(Relation rel, RangeVar *name);
 
 
 /* ----------------------------------------------------------------
@@ -455,7 +478,7 @@ static void RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid,
  */
 ObjectAddress
 DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
-              ObjectAddress *typaddress)
+              ObjectAddress *typaddress, const char *queryString)
 {
    char        relname[NAMEDATALEN];
    Oid         namespaceId;
@@ -492,6 +515,14 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
                (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
                 errmsg("ON COMMIT can only be used on temporary tables")));
 
+   if (stmt->partspec != NULL)
+   {
+       if (relkind != RELKIND_RELATION)
+           elog(ERROR, "unexpected relkind: %d", (int) relkind);
+
+       relkind = RELKIND_PARTITIONED_TABLE;
+   }
+
    /*
     * Look up the namespace in which we are supposed to create the relation,
     * check we have permission to create there, lock it against concurrent
@@ -578,6 +609,7 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
     */
    schema = MergeAttributes(schema, stmt->inhRelations,
                             stmt->relation->relpersistence,
+                            stmt->partbound != NULL,
                             &inheritOids, &old_constraints, &parentOidCount);
 
    /*
@@ -588,17 +620,33 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
    descriptor = BuildDescForRelation(schema);
 
    /*
-    * Notice that we allow OIDs here only for plain tables, even though some
-    * other relkinds can support them.  This is necessary because the
-    * default_with_oids GUC must apply only to plain tables and not any other
-    * relkind; doing otherwise would break existing pg_dump files.  We could
-    * allow explicit "WITH OIDS" while not allowing default_with_oids to
-    * affect other relkinds, but it would complicate interpretOidsOption().
+    * Notice that we allow OIDs here only for plain tables and partitioned
+    * tables, even though some other relkinds can support them.  This is
+    * necessary because the default_with_oids GUC must apply only to plain
+    * tables and not any other relkind; doing otherwise would break existing
+    * pg_dump files.  We could allow explicit "WITH OIDS" while not allowing
+    * default_with_oids to affect other relkinds, but it would complicate
+    * interpretOidsOption().
     */
    localHasOids = interpretOidsOption(stmt->options,
-                                      (relkind == RELKIND_RELATION));
+                                      (relkind == RELKIND_RELATION ||
+                                       relkind == RELKIND_PARTITIONED_TABLE));
    descriptor->tdhasoid = (localHasOids || parentOidCount > 0);
 
+   if (stmt->partbound)
+   {
+       /* If the parent has OIDs, partitions must have them too. */
+       if (parentOidCount > 0 && !localHasOids)
+           ereport(ERROR,
+                   (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                    errmsg("cannot create table without OIDs as partition of table with OIDs")));
+       /* If the parent doesn't, partitions must not have them. */
+       if (parentOidCount == 0 && localHasOids)
+           ereport(ERROR,
+                   (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                    errmsg("cannot create table with OIDs as partition of table without OIDs")));
+   }
+
    /*
     * Find columns with default values and prepare for insertion of the
     * defaults.  Pre-cooked (that is, inherited) defaults go into a list of
@@ -697,6 +745,110 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
     */
    rel = relation_open(relationId, AccessExclusiveLock);
 
+   /* Process and store partition bound, if any. */
+   if (stmt->partbound)
+   {
+       Node       *bound;
+       ParseState *pstate;
+       Oid         parentId = linitial_oid(inheritOids);
+       Relation    parent;
+
+       /* Already have strong enough lock on the parent */
+       parent = heap_open(parentId, NoLock);
+
+       /*
+        * We are going to try to validate the partition bound specification
+        * against the partition key of parentRel, so it better have one.
+        */
+       if (parent->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+           ereport(ERROR,
+                   (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+                    errmsg("\"%s\" is not partitioned",
+                           RelationGetRelationName(parent))));
+
+       /* Tranform the bound values */
+       pstate = make_parsestate(NULL);
+       pstate->p_sourcetext = queryString;
+       bound = transformPartitionBound(pstate, parent, stmt->partbound);
+
+       /*
+        * Check first that the new partition's bound is valid and does not
+        * overlap with any of existing partitions of the parent - note that
+        * it does not return on error.
+        */
+       check_new_partition_bound(relname, parent, bound);
+       heap_close(parent, NoLock);
+
+       /* Update the pg_class entry. */
+       StorePartitionBound(rel, bound);
+
+       /*
+        * The code that follows may also update the pg_class tuple to update
+        * relnumchecks, so bump up the command counter to avoid the "already
+        * updated by self" error.
+        */
+       CommandCounterIncrement();
+   }
+
+   /*
+    * Process the partitioning specification (if any) and store the
+    * partition key information into the catalog.
+    */
+   if (stmt->partspec)
+   {
+       char            strategy;
+       int             partnatts,
+                       i;
+       AttrNumber      partattrs[PARTITION_MAX_KEYS];
+       Oid             partopclass[PARTITION_MAX_KEYS];
+       Oid             partcollation[PARTITION_MAX_KEYS];
+       List           *partexprs = NIL;
+       List           *cmds = NIL;
+
+       /*
+        * We need to transform the raw parsetrees corresponding to partition
+        * expressions into executable expression trees.  Like column defaults
+        * and CHECK constraints, we could not have done the transformation
+        * earlier.
+        */
+       stmt->partspec = transformPartitionSpec(rel, stmt->partspec,
+                                               &strategy);
+       ComputePartitionAttrs(rel, stmt->partspec->partParams,
+                             partattrs, &partexprs, partopclass,
+                             partcollation);
+
+       partnatts = list_length(stmt->partspec->partParams);
+       StorePartitionKey(rel, strategy, partnatts, partattrs, partexprs,
+                         partopclass, partcollation);
+
+       /* Force key columns to be NOT NULL when using range partitioning */
+       if (strategy == PARTITION_STRATEGY_RANGE)
+       {
+           for (i = 0; i < partnatts; i++)
+           {
+               AttrNumber  partattno = partattrs[i];
+               Form_pg_attribute attform = descriptor->attrs[partattno-1];
+
+               if (partattno != 0 && !attform->attnotnull)
+               {
+                   /* Add a subcommand to make this one NOT NULL */
+                   AlterTableCmd *cmd = makeNode(AlterTableCmd);
+
+                   cmd->subtype = AT_SetNotNull;
+                   cmd->name = pstrdup(NameStr(attform->attname));
+                   cmds = lappend(cmds, cmd);
+               }
+           }
+
+           /*
+            * Although, there cannot be any partitions yet, we still need to
+            * pass true for recurse; ATPrepSetNotNull() complains if we don't
+            */
+           if (cmds != NIL)
+               AlterTableInternal(RelationGetRelid(rel), cmds, true);
+       }
+   }
+
    /*
     * Now add any newly specified column default values and CHECK constraints
     * to the new relation.  These are passed to us in the form of raw
@@ -927,6 +1079,7 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
    HeapTuple   tuple;
    struct DropRelationCallbackState *state;
    char        relkind;
+   char        expected_relkind;
    Form_pg_class classform;
    LOCKMODE    heap_lockmode;
 
@@ -955,7 +1108,19 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
        return;                 /* concurrently dropped, so nothing to do */
    classform = (Form_pg_class) GETSTRUCT(tuple);
 
-   if (classform->relkind != relkind)
+   /*
+    * Both RELKIND_RELATION and RELKIND_PARTITIONED_TABLE are OBJECT_TABLE,
+    * but RemoveRelations() can only pass one relkind for a given relation.
+    * It chooses RELKIND_RELATION for both regular and partitioned tables.
+    * That means we must be careful before giving the wrong type error when
+    * the relation is RELKIND_PARTITIONED_TABLE.
+    */
+   if (classform->relkind == RELKIND_PARTITIONED_TABLE)
+       expected_relkind = RELKIND_RELATION;
+   else
+       expected_relkind = classform->relkind;
+
+   if (relkind != expected_relkind)
        DropErrorMsgWrongType(rel->relname, classform->relkind, relkind);
 
    /* Allow DROP to either table owner or schema owner */
@@ -1054,6 +1219,10 @@ ExecuteTruncate(TruncateStmt *stmt)
                relids = lappend_oid(relids, childrelid);
            }
        }
+       else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+           ereport(ERROR,
+                   (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                    errmsg("must truncate child tables too")));
    }
 
    /*
@@ -1153,6 +1322,7 @@ ExecuteTruncate(TruncateStmt *stmt)
        InitResultRelInfo(resultRelInfo,
                          rel,
                          0,    /* dummy rangetable index */
+                         false,
                          0);
        resultRelInfo++;
    }
@@ -1293,7 +1463,8 @@ truncate_check_rel(Relation rel)
    AclResult   aclresult;
 
    /* Only allow truncate on regular tables */
-   if (rel->rd_rel->relkind != RELKIND_RELATION)
+   if (rel->rd_rel->relkind != RELKIND_RELATION &&
+       rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
        ereport(ERROR,
                (errcode(ERRCODE_WRONG_OBJECT_TYPE),
                 errmsg("\"%s\" is not a table",
@@ -1359,6 +1530,7 @@ storage_name(char c)
  *     of ColumnDef's.) It is destructively changed.
  * 'supers' is a list of names (as RangeVar nodes) of parent relations.
  * 'relpersistence' is a persistence type of the table.
+ * 'is_partition' tells if the table is a partition
  *
  * Output arguments:
  * 'supOids' receives a list of the OIDs of the parent relations.
@@ -1410,7 +1582,8 @@ storage_name(char c)
  */
 static List *
 MergeAttributes(List *schema, List *supers, char relpersistence,
-               List **supOids, List **supconstr, int *supOidCount)
+               bool is_partition, List **supOids, List **supconstr,
+               int *supOidCount)
 {
    ListCell   *entry;
    List       *inhSchema = NIL;
@@ -1420,6 +1593,7 @@ MergeAttributes(List *schema, List *supers, char relpersistence,
    bool        have_bogus_defaults = false;
    int         child_attno;
    static Node bogus_marker = {0};     /* marks conflicting defaults */
+   List       *saved_schema = NIL;
 
    /*
     * Check for and reject tables with too many columns. We perform this
@@ -1438,6 +1612,17 @@ MergeAttributes(List *schema, List *supers, char relpersistence,
                 errmsg("tables can have at most %d columns",
                        MaxHeapAttributeNumber)));
 
+   /*
+    * In case of a partition, there are no new column definitions, only
+    * dummy ColumnDefs created for column constraints.  We merge these
+    * constraints inherited from the parent.
+    */
+   if (is_partition)
+   {
+       saved_schema = schema;
+       schema = NIL;
+   }
+
    /*
     * Check for duplicate names in the explicit list of attributes.
     *
@@ -1518,11 +1703,35 @@ MergeAttributes(List *schema, List *supers, char relpersistence,
         * on the parent table, which might otherwise be attempting to clear
         * the parent's relhassubclass field, if its previous children were
         * recently dropped.
+        *
+        * If the child table is a partition, then we instead grab an exclusive
+        * lock on the parent because its partition descriptor will be changed
+        * by addition of the new partition.
+        */
+       if (!is_partition)
+           relation = heap_openrv(parent, ShareUpdateExclusiveLock);
+       else
+           relation = heap_openrv(parent, AccessExclusiveLock);
+
+       /*
+        * We do not allow partitioned tables and partitions to participate
+        * in regular inheritance.
         */
-       relation = heap_openrv(parent, ShareUpdateExclusiveLock);
+       if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
+           !is_partition)
+           ereport(ERROR,
+                   (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                    errmsg("cannot inherit from partitioned table \"%s\"",
+                           parent->relname)));
+       if (relation->rd_rel->relispartition && !is_partition)
+           ereport(ERROR,
+                   (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                    errmsg("cannot inherit from partition \"%s\"",
+                           parent->relname)));
 
        if (relation->rd_rel->relkind != RELKIND_RELATION &&
-           relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
+           relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
+           relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
            ereport(ERROR,
                    (errcode(ERRCODE_WRONG_OBJECT_TYPE),
                     errmsg("inherited relation \"%s\" is not a table or foreign table",
@@ -1532,7 +1741,9 @@ MergeAttributes(List *schema, List *supers, char relpersistence,
            relation->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
            ereport(ERROR,
                    (errcode(ERRCODE_WRONG_OBJECT_TYPE),
-                    errmsg("cannot inherit from temporary relation \"%s\"",
+                    errmsg(!is_partition
+                           ? "cannot inherit from temporary relation \"%s\""
+                           : "cannot create a permanent relation as partition of temporary relation \"%s\"",
                            parent->relname)));
 
        /* If existing rel is temp, it must belong to this session */
@@ -1540,7 +1751,9 @@ MergeAttributes(List *schema, List *supers, char relpersistence,
            !relation->rd_islocaltemp)
            ereport(ERROR,
                    (errcode(ERRCODE_WRONG_OBJECT_TYPE),
-                    errmsg("cannot inherit from temporary relation of another session")));
+                    errmsg(!is_partition
+                           ? "cannot inherit from temporary relation of another session"
+                           : "cannot create as partition of temporary relation of another session")));
 
        /*
         * We should have an UNDER permission flag for this, but for now,
@@ -1777,9 +1990,9 @@ MergeAttributes(List *schema, List *supers, char relpersistence,
        pfree(newattno);
 
        /*
-        * Close the parent rel, but keep our ShareUpdateExclusiveLock on it
-        * until xact commit.  That will prevent someone else from deleting or
-        * ALTERing the parent before the child is committed.
+        * Close the parent rel, but keep our lock on it until xact commit.
+        * That will prevent someone else from deleting or ALTERing the parent
+        * before the child is committed.
         */
        heap_close(relation, NoLock);
    }
@@ -1787,7 +2000,8 @@ MergeAttributes(List *schema, List *supers, char relpersistence,
    /*
     * If we had no inherited attributes, the result schema is just the
     * explicitly declared columns.  Otherwise, we need to merge the declared
-    * columns into the inherited schema list.
+    * columns into the inherited schema list.  Although, we never have any
+    * explicitly declared columns if the table is a partition.
     */
    if (inhSchema != NIL)
    {
@@ -1815,6 +2029,12 @@ MergeAttributes(List *schema, List *supers, char relpersistence,
                Oid         defcollid,
                            newcollid;
 
+               /*
+                * Partitions have only one parent, so conflict should never
+                * occur
+                */
+               Assert(!is_partition);
+
                /*
                 * Yes, try to merge the two column definitions. They must
                 * have the same type, typmod, and collation.
@@ -1896,6 +2116,56 @@ MergeAttributes(List *schema, List *supers, char relpersistence,
                            MaxHeapAttributeNumber)));
    }
 
+   /*
+    * Now that we have the column definition list for a partition, we can
+    * check whether the columns referenced in column option specifications
+    * actually exist.  Also, we merge the options into the corresponding
+    * column definitions.
+    */
+   if (is_partition && list_length(saved_schema) > 0)
+   {
+       schema = list_concat(schema, saved_schema);
+
+       foreach(entry, schema)
+       {
+           ColumnDef  *coldef = lfirst(entry);
+           ListCell   *rest = lnext(entry);
+           ListCell   *prev = entry;
+
+           /*
+            * Partition column option that does not belong to a column from
+            * the parent.  This works because the columns from the parent
+            * come first in the list (see above).
+            */
+           if (coldef->typeName == NULL)
+               ereport(ERROR,
+                   (errcode(ERRCODE_UNDEFINED_COLUMN),
+                    errmsg("column \"%s\" does not exist",
+                           coldef->colname)));
+           while (rest != NULL)
+           {
+               ColumnDef  *restdef = lfirst(rest);
+               ListCell   *next = lnext(rest);     /* need to save it in case
+                                                    * we delete it */
+
+               if (strcmp(coldef->colname, restdef->colname) == 0)
+               {
+                   /*
+                    * merge the column options into the column from the
+                    * parent
+                    */
+                   coldef->is_not_null = restdef->is_not_null;
+                   coldef->raw_default = restdef->raw_default;
+                   coldef->cooked_default = restdef->cooked_default;
+                   coldef->constraints = restdef->constraints;
+                   list_delete_cell(schema, rest, prev);
+               }
+               prev = rest;
+               rest = next;
+           }
+       }
+   }
+
    /*
     * If we found any conflicting parent default values, check to make sure
     * they were overridden by the child.
@@ -2166,7 +2436,8 @@ renameatt_check(Oid myrelid, Form_pg_class classform, bool recursing)
        relkind != RELKIND_MATVIEW &&
        relkind != RELKIND_COMPOSITE_TYPE &&
        relkind != RELKIND_INDEX &&
-       relkind != RELKIND_FOREIGN_TABLE)
+       relkind != RELKIND_FOREIGN_TABLE &&
+       relkind != RELKIND_PARTITIONED_TABLE)
        ereport(ERROR,
                (errcode(ERRCODE_WRONG_OBJECT_TYPE),
                 errmsg("\"%s\" is not a table, view, materialized view, composite type, index, or foreign table",
@@ -3057,6 +3328,11 @@ AlterTableGetLockLevel(List *cmds)
                cmd_lockmode = AlterTableGetRelOptionsLockLevel((List *) cmd->def);
                break;
 
+           case AT_AttachPartition:
+           case AT_DetachPartition:
+               cmd_lockmode = AccessExclusiveLock;
+               break;
+
            default:            /* oops */
                elog(ERROR, "unrecognized alter table type: %d",
                     (int) cmd->subtype);
@@ -3168,12 +3444,14 @@ ATPrepCmd(List **wqueue, Relation rel, AlterTableCmd *cmd,
            break;
        case AT_DropNotNull:    /* ALTER COLUMN DROP NOT NULL */
            ATSimplePermissions(rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+           ATPrepDropNotNull(rel, recurse, recursing);
            ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode);
            /* No command-specific prep needed */
            pass = AT_PASS_DROP;
            break;
        case AT_SetNotNull:     /* ALTER COLUMN SET NOT NULL */
            ATSimplePermissions(rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+           ATPrepSetNotNull(rel, recurse, recursing);
            ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode);
            /* No command-specific prep needed */
            pass = AT_PASS_ADD_CONSTR;
@@ -3374,6 +3652,12 @@ ATPrepCmd(List **wqueue, Relation rel, AlterTableCmd *cmd,
            /* No command-specific prep needed */
            pass = AT_PASS_MISC;
            break;
+       case AT_AttachPartition:
+       case AT_DetachPartition:
+           ATSimplePermissions(rel, ATT_TABLE);
+           /* No command-specific prep needed */
+           pass = AT_PASS_MISC;
+           break;
        default:                /* oops */
            elog(ERROR, "unrecognized alter table type: %d",
                 (int) cmd->subtype);
@@ -3444,7 +3728,14 @@ ATRewriteCatalogs(List **wqueue, LOCKMODE lockmode)
    {
        AlteredTableInfo *tab = (AlteredTableInfo *) lfirst(ltab);
 
-       if (tab->relkind == RELKIND_RELATION ||
+       /*
+        * If the table is source table of ATTACH PARTITION command, we did
+        * not modify anything about it that will change its toasting
+        * requirement, so no need to check.
+        */
+       if (((tab->relkind == RELKIND_RELATION ||
+             tab->relkind == RELKIND_PARTITIONED_TABLE) &&
+             tab->partition_constraint == NIL) ||
            tab->relkind == RELKIND_MATVIEW)
            AlterTableCreateToastTable(tab->relid, (Datum) 0, lockmode);
    }
@@ -3693,6 +3984,12 @@ ATExecCmd(List **wqueue, AlteredTableInfo *tab, Relation rel,
        case AT_GenericOptions:
            ATExecGenericOptions(rel, (List *) cmd->def);
            break;
+       case AT_AttachPartition:
+           ATExecAttachPartition(wqueue, rel, (PartitionCmd *) cmd->def);
+           break;
+       case AT_DetachPartition:
+           ATExecDetachPartition(rel, ((PartitionCmd *) cmd->def)->name);
+           break;
        default:                /* oops */
            elog(ERROR, "unrecognized alter table type: %d",
                 (int) cmd->subtype);
@@ -3878,7 +4175,8 @@ ATRewriteTables(AlterTableStmt *parsetree, List **wqueue, LOCKMODE lockmode)
             * Test the current data within the table against new constraints
             * generated by ALTER TABLE commands, but don't rebuild data.
             */
-           if (tab->constraints != NIL || tab->new_notnull)
+           if (tab->constraints != NIL || tab->new_notnull ||
+               tab->partition_constraint != NIL)
                ATRewriteTable(tab, InvalidOid, lockmode);
 
            /*
@@ -3958,6 +4256,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
    CommandId   mycid;
    BulkInsertState bistate;
    int         hi_options;
+   List       *partqualstate = NIL;
 
    /*
     * Open the relation(s).  We have surely already locked the existing
@@ -4022,6 +4321,15 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
        }
    }
 
+   /* Build expression execution states for partition check quals */
+   if (tab->partition_constraint)
+   {
+       needscan = true;
+       partqualstate = (List *)
+                       ExecPrepareExpr((Expr *) tab->partition_constraint,
+                                       estate);
+   }
+
    foreach(l, tab->newvals)
    {
        NewColumnValue *ex = lfirst(l);
@@ -4211,6 +4519,11 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
                }
            }
 
+           if (partqualstate && !ExecQual(partqualstate, econtext, true))
+               ereport(ERROR,
+                       (errcode(ERRCODE_CHECK_VIOLATION),
+                        errmsg("partition constraint is violated by some row")));
+
            /* Write the tuple out to the new relation */
            if (newrel)
                heap_insert(newrel, tuple, mycid, hi_options, bistate);
@@ -4291,6 +4604,7 @@ ATSimplePermissions(Relation rel, int allowed_targets)
    switch (rel->rd_rel->relkind)
    {
        case RELKIND_RELATION:
+       case RELKIND_PARTITIONED_TABLE:
            actual_target = ATT_TABLE;
            break;
        case RELKIND_VIEW:
@@ -4407,7 +4721,8 @@ ATSimpleRecursion(List **wqueue, Relation rel,
     */
    if (recurse &&
        (rel->rd_rel->relkind == RELKIND_RELATION ||
-        rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE))
+        rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE ||
+        rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE))
    {
        Oid         relid = RelationGetRelid(rel);
        ListCell   *child;
@@ -4527,7 +4842,8 @@ find_composite_type_dependencies(Oid typeOid, Relation origRelation,
        att = rel->rd_att->attrs[pg_depend->objsubid - 1];
 
        if (rel->rd_rel->relkind == RELKIND_RELATION ||
-           rel->rd_rel->relkind == RELKIND_MATVIEW)
+           rel->rd_rel->relkind == RELKIND_MATVIEW ||
+           rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
        {
            if (origTypeName)
                ereport(ERROR,
@@ -4728,6 +5044,11 @@ ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel,
    if (recursing)
        ATSimplePermissions(rel, ATT_TABLE | ATT_FOREIGN_TABLE);
 
+   if (rel->rd_rel->relispartition && !recursing)
+       ereport(ERROR,
+               (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                errmsg("cannot add column to a partition")));
+
    attrdesc = heap_open(AttributeRelationId, RowExclusiveLock);
 
    /*
@@ -5174,6 +5495,20 @@ ATPrepAddOids(List **wqueue, Relation rel, bool recurse, AlterTableCmd *cmd, LOC
  * Return the address of the modified column.  If the column was already
  * nullable, InvalidObjectAddress is returned.
  */
+
+static void
+ATPrepDropNotNull(Relation rel, bool recurse, bool recursing)
+{
+   /*
+    * If the parent is a partitioned table, like check constraints, NOT NULL
+    * constraints must be dropped from child tables.
+    */
+   if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
+       !recurse && !recursing)
+       ereport(ERROR,
+               (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+                errmsg("constraint must be dropped from child tables too")));
+}
 static ObjectAddress
 ATExecDropNotNull(Relation rel, const char *colName, LOCKMODE lockmode)
 {
@@ -5249,6 +5584,45 @@ ATExecDropNotNull(Relation rel, const char *colName, LOCKMODE lockmode)
 
    list_free(indexoidlist);
 
+   /* If rel is partition, shouldn't drop NOT NULL if parent has the same */
+   if (rel->rd_rel->relispartition)
+   {
+       Oid         parentId = get_partition_parent(RelationGetRelid(rel));
+       Relation    parent = heap_open(parentId, AccessShareLock);
+       TupleDesc   tupDesc = RelationGetDescr(parent);
+       AttrNumber  parent_attnum;
+
+       parent_attnum = get_attnum(parentId, colName);
+       if (tupDesc->attrs[parent_attnum - 1]->attnotnull)
+           ereport(ERROR,
+                   (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+                    errmsg("column \"%s\" is marked NOT NULL in parent table",
+                           colName)));
+       heap_close(parent, AccessShareLock);
+   }
+
+   /*
+    * If the table is a range partitioned table, check that the column
+    * is not in the partition key.
+    */
+   if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+   {
+       PartitionKey    key = RelationGetPartitionKey(rel);
+       int             partnatts = get_partition_natts(key),
+                       i;
+
+       for (i = 0; i < partnatts; i++)
+       {
+           AttrNumber  partattnum = get_partition_col_attnum(key, i);
+
+           if (partattnum == attnum)
+               ereport(ERROR,
+                       (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+                        errmsg("column \"%s\" is in range partition key",
+                               colName)));
+       }
+   }
+
    /*
     * Okay, actually perform the catalog change ... if needed
     */
@@ -5281,6 +5655,21 @@ ATExecDropNotNull(Relation rel, const char *colName, LOCKMODE lockmode)
  * Return the address of the modified column.  If the column was already NOT
  * NULL, InvalidObjectAddress is returned.
  */
+
+static void
+ATPrepSetNotNull(Relation rel, bool recurse, bool recursing)
+{
+   /*
+    * If the parent is a partitioned table, like check constraints, NOT NULL
+    * constraints must be added to the child tables.
+    */
+   if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
+       !recurse && !recursing)
+       ereport(ERROR,
+               (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+                errmsg("constraint must be added to child tables too")));
+}
+
 static ObjectAddress
 ATExecSetNotNull(AlteredTableInfo *tab, Relation rel,
                 const char *colName, LOCKMODE lockmode)
@@ -5419,7 +5808,8 @@ ATPrepSetStatistics(Relation rel, const char *colName, Node *newValue, LOCKMODE
    if (rel->rd_rel->relkind != RELKIND_RELATION &&
        rel->rd_rel->relkind != RELKIND_MATVIEW &&
        rel->rd_rel->relkind != RELKIND_INDEX &&
-       rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
+       rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
+       rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
        ereport(ERROR,
                (errcode(ERRCODE_WRONG_OBJECT_TYPE),
                 errmsg("\"%s\" is not a table, materialized view, index, or foreign table",
@@ -5691,6 +6081,68 @@ ATPrepDropColumn(List **wqueue, Relation rel, bool recurse, bool recursing,
        cmd->subtype = AT_DropColumnRecurse;
 }
 
+/*
+ * Checks if attnum is a partition attribute for rel
+ *
+ * Sets *used_in_expr if attnum is found to be referenced in some partition
+ * key expression.  It's possible for a column to be both used directly and
+ * as part of an expression; if that happens, *used_in_expr may end up as
+ * either true or false.  That's OK for current uses of this function, because
+ * *used_in_expr is only used to tailor the error message text.
+ */
+static bool
+is_partition_attr(Relation rel, AttrNumber attnum, bool *used_in_expr)
+{
+   PartitionKey    key;
+   int             partnatts;
+   List           *partexprs;
+   ListCell       *partexprs_item;
+   int             i;
+
+   if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+       return false;
+
+   key = RelationGetPartitionKey(rel);
+   partnatts = get_partition_natts(key);
+   partexprs = get_partition_exprs(key);
+
+   partexprs_item = list_head(partexprs);
+   for (i = 0; i < partnatts; i++)
+   {
+       AttrNumber  partattno = get_partition_col_attnum(key, i);
+
+       if (partattno != 0)
+       {
+           if (attnum == partattno)
+           {
+               if (used_in_expr)
+                   *used_in_expr = false;
+               return true;
+           }
+       }
+       else
+       {
+           /* Arbitrary expression */
+           Node       *expr = (Node *) lfirst(partexprs_item);
+           Bitmapset  *expr_attrs = NULL;
+
+           /* Find all attributes referenced */
+           pull_varattnos(expr, 1, &expr_attrs);
+           partexprs_item = lnext(partexprs_item);
+
+           if (bms_is_member(attnum - FirstLowInvalidHeapAttributeNumber,
+                             expr_attrs))
+           {
+               if (used_in_expr)
+                   *used_in_expr = true;
+               return true;
+           }
+       }
+   }
+
+   return false;
+}
+
 /*
  * Return value is the address of the dropped column.
  */
@@ -5705,6 +6157,7 @@ ATExecDropColumn(List **wqueue, Relation rel, const char *colName,
    AttrNumber  attnum;
    List       *children;
    ObjectAddress object;
+   bool        is_expr;
 
    /* At top level, permission check was done in ATPrepCmd, else do it */
    if (recursing)
@@ -5749,6 +6202,19 @@ ATExecDropColumn(List **wqueue, Relation rel, const char *colName,
                 errmsg("cannot drop inherited column \"%s\"",
                        colName)));
 
+   /* Don't drop columns used in the partition key */
+   if (is_partition_attr(rel, attnum, &is_expr))
+   {
+       if (!is_expr)
+           ereport(ERROR,
+                   (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+                    errmsg("cannot drop column named in partition key")));
+       else
+           ereport(ERROR,
+                   (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+                    errmsg("cannot drop column referenced in partition key expression")));
+   }
+
    ReleaseSysCache(tuple);
 
    /*
@@ -5763,6 +6229,15 @@ ATExecDropColumn(List **wqueue, Relation rel, const char *colName,
        Relation    attr_rel;
        ListCell   *child;
 
+       /*
+        * In case of a partitioned table, the column must be dropped from the
+        * partitions as well.
+        */
+       if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE && !recurse)
+           ereport(ERROR,
+                   (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+                    errmsg("column must be dropped from child tables too")));
+
        attr_rel = heap_open(AttributeRelationId, RowExclusiveLock);
        foreach(child, children)
        {
@@ -6267,6 +6742,12 @@ ATAddForeignKeyConstraint(AlteredTableInfo *tab, Relation rel,
     * Validity checks (permission checks wait till we have the column
     * numbers)
     */
+   if (pkrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+       ereport(ERROR,
+               (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                errmsg("cannot reference partitioned table \"%s\"",
+                       RelationGetRelationName(pkrel))));
+
    if (pkrel->rd_rel->relkind != RELKIND_RELATION)
        ereport(ERROR,
                (errcode(ERRCODE_WRONG_OBJECT_TYPE),
@@ -7776,6 +8257,16 @@ ATExecDropConstraint(Relation rel, const char *constrName,
        }
    }
 
+   /*
+    * In case of a partitioned table, the constraint must be dropped from
+    * the partitions too.  There is no such thing as NO INHERIT constraints
+    * in case of partitioned tables.
+    */
+   if (!recurse && rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+       ereport(ERROR,
+               (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+                errmsg("constraint must be dropped from child tables too")));
+
    /*
     * Propagate to children as appropriate.  Unlike most other ALTER
     * routines, we have to do this one level of recursion at a time; we can't
@@ -7904,6 +8395,7 @@ ATPrepAlterColumnType(List **wqueue,
    NewColumnValue *newval;
    ParseState *pstate = make_parsestate(NULL);
    AclResult   aclresult;
+   bool        is_expr;
 
    if (rel->rd_rel->reloftype && !recursing)
        ereport(ERROR,
@@ -7934,6 +8426,19 @@ ATPrepAlterColumnType(List **wqueue,
                 errmsg("cannot alter inherited column \"%s\"",
                        colName)));
 
+   /* Don't alter columns used in the partition key */
+   if (is_partition_attr(rel, attnum, &is_expr))
+   {
+       if (!is_expr)
+           ereport(ERROR,
+                   (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+                    errmsg("cannot alter type of column named in partition key")));
+       else
+           ereport(ERROR,
+                   (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+                    errmsg("cannot alter type of column referenced in partition key expression")));
+   }
+
    /* Look up the target type */
    typenameTypeIdAndMod(NULL, typeName, &targettype, &targettypmod);
 
@@ -7949,7 +8454,8 @@ ATPrepAlterColumnType(List **wqueue,
                       list_make1_oid(rel->rd_rel->reltype),
                       false);
 
-   if (tab->relkind == RELKIND_RELATION)
+   if (tab->relkind == RELKIND_RELATION ||
+       tab->relkind == RELKIND_PARTITIONED_TABLE)
    {
        /*
         * Set up an expression to transform the old data value to the new
@@ -8979,6 +9485,7 @@ ATExecChangeOwner(Oid relationOid, Oid newOwnerId, bool recursing, LOCKMODE lock
        case RELKIND_VIEW:
        case RELKIND_MATVIEW:
        case RELKIND_FOREIGN_TABLE:
+       case RELKIND_PARTITIONED_TABLE:
            /* ok to change owner */
            break;
        case RELKIND_INDEX:
@@ -9440,6 +9947,7 @@ ATExecSetRelOptions(Relation rel, List *defList, AlterTableType operation,
        case RELKIND_RELATION:
        case RELKIND_TOASTVALUE:
        case RELKIND_MATVIEW:
+       case RELKIND_PARTITIONED_TABLE:
            (void) heap_reloptions(rel->rd_rel->relkind, newOptions, true);
            break;
        case RELKIND_VIEW:
@@ -9860,7 +10368,8 @@ AlterTableMoveAll(AlterTableMoveAllStmt *stmt)
 
        /* Only move the object type requested */
        if ((stmt->objtype == OBJECT_TABLE &&
-            relForm->relkind != RELKIND_RELATION) ||
+            relForm->relkind != RELKIND_RELATION &&
+            relForm->relkind != RELKIND_PARTITIONED_TABLE) ||
            (stmt->objtype == OBJECT_INDEX &&
             relForm->relkind != RELKIND_INDEX) ||
            (stmt->objtype == OBJECT_MATVIEW &&
@@ -10059,6 +10568,16 @@ ATPrepAddInherit(Relation child_rel)
        ereport(ERROR,
                (errcode(ERRCODE_WRONG_OBJECT_TYPE),
                 errmsg("cannot change inheritance of typed table")));
+
+   if (child_rel->rd_rel->relispartition)
+       ereport(ERROR,
+               (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                errmsg("cannot change inheritance of a partition")));
+
+   if (child_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+       ereport(ERROR,
+               (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                errmsg("cannot change inheritance of partitioned table")));
 }
 
 /*
@@ -10067,12 +10586,7 @@ ATPrepAddInherit(Relation child_rel)
 static ObjectAddress
 ATExecAddInherit(Relation child_rel, RangeVar *parent, LOCKMODE lockmode)
 {
-   Relation    parent_rel,
-               catalogRelation;
-   SysScanDesc scan;
-   ScanKeyData key;
-   HeapTuple   inheritsTuple;
-   int32       inhseqno;
+   Relation    parent_rel;
    List       *children;
    ObjectAddress address;
 
@@ -10110,18 +10624,94 @@ ATExecAddInherit(Relation child_rel, RangeVar *parent, LOCKMODE lockmode)
                (errcode(ERRCODE_WRONG_OBJECT_TYPE),
         errmsg("cannot inherit to temporary relation of another session")));
 
+   /* Prevent partitioned tables from becoming inheritance parents */
+   if (parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+       ereport(ERROR,
+               (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                errmsg("cannot inherit from partitioned table \"%s\"",
+                       parent->relname)));
+
+   /* Likewise for partitions */
+   if (parent_rel->rd_rel->relispartition)
+       ereport(ERROR,
+               (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                errmsg("cannot inherit from a partition")));
+
    /*
-    * Check for duplicates in the list of parents, and determine the highest
-    * inhseqno already present; we'll use the next one for the new parent.
-    * (Note: get RowExclusiveLock because we will write pg_inherits below.)
+    * Prevent circularity by seeing if proposed parent inherits from child.
+    * (In particular, this disallows making a rel inherit from itself.)
     *
-    * Note: we do not reject the case where the child already inherits from
-    * the parent indirectly; CREATE TABLE doesn't reject comparable cases.
+    * This is not completely bulletproof because of race conditions: in
+    * multi-level inheritance trees, someone else could concurrently be
+    * making another inheritance link that closes the loop but does not join
+    * either of the rels we have locked.  Preventing that seems to require
+    * exclusive locks on the entire inheritance tree, which is a cure worse
+    * than the disease.  find_all_inheritors() will cope with circularity
+    * anyway, so don't sweat it too much.
+    *
+    * We use weakest lock we can on child's children, namely AccessShareLock.
     */
-   catalogRelation = heap_open(InheritsRelationId, RowExclusiveLock);
-   ScanKeyInit(&key,
-               Anum_pg_inherits_inhrelid,
-               BTEqualStrategyNumber, F_OIDEQ,
+   children = find_all_inheritors(RelationGetRelid(child_rel),
+                                  AccessShareLock, NULL);
+
+   if (list_member_oid(children, RelationGetRelid(parent_rel)))
+       ereport(ERROR,
+               (errcode(ERRCODE_DUPLICATE_TABLE),
+                errmsg("circular inheritance not allowed"),
+                errdetail("\"%s\" is already a child of \"%s\".",
+                          parent->relname,
+                          RelationGetRelationName(child_rel))));
+
+   /* If parent has OIDs then child must have OIDs */
+   if (parent_rel->rd_rel->relhasoids && !child_rel->rd_rel->relhasoids)
+       ereport(ERROR,
+               (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                errmsg("table \"%s\" without OIDs cannot inherit from table \"%s\" with OIDs",
+                       RelationGetRelationName(child_rel),
+                       RelationGetRelationName(parent_rel))));
+
+   /* OK to create inheritance */
+   CreateInheritance(child_rel, parent_rel);
+
+   ObjectAddressSet(address, RelationRelationId,
+                    RelationGetRelid(parent_rel));
+
+   /* keep our lock on the parent relation until commit */
+   heap_close(parent_rel, NoLock);
+
+   return address;
+}
+
+/*
+ * CreateInheritance
+ *     Catalog manipulation portion of creating inheritance between a child
+ *     table and a parent table.
+ *
+ * Common to ATExecAddInherit() and ATExecAttachPartition().
+ */
+static void
+CreateInheritance(Relation child_rel, Relation parent_rel)
+{
+   Relation    catalogRelation;
+   SysScanDesc scan;
+   ScanKeyData key;
+   HeapTuple   inheritsTuple;
+   int32       inhseqno;
+
+   /* Note: get RowExclusiveLock because we will write pg_inherits below. */
+   catalogRelation = heap_open(InheritsRelationId, RowExclusiveLock);
+
+   /*
+    * Check for duplicates in the list of parents, and determine the highest
+    * inhseqno already present; we'll use the next one for the new parent.
+    * Also, if proposed child is a partition, it cannot already be inheriting.
+    *
+    * Note: we do not reject the case where the child already inherits from
+    * the parent indirectly; CREATE TABLE doesn't reject comparable cases.
+    */
+   ScanKeyInit(&key,
+               Anum_pg_inherits_inhrelid,
+               BTEqualStrategyNumber, F_OIDEQ,
                ObjectIdGetDatum(RelationGetRelid(child_rel)));
    scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId,
                              true, NULL, 1, &key);
@@ -10137,44 +10727,12 @@ ATExecAddInherit(Relation child_rel, RangeVar *parent, LOCKMODE lockmode)
                    (errcode(ERRCODE_DUPLICATE_TABLE),
             errmsg("relation \"%s\" would be inherited from more than once",
                    RelationGetRelationName(parent_rel))));
+
        if (inh->inhseqno > inhseqno)
            inhseqno = inh->inhseqno;
    }
    systable_endscan(scan);
 
-   /*
-    * Prevent circularity by seeing if proposed parent inherits from child.
-    * (In particular, this disallows making a rel inherit from itself.)
-    *
-    * This is not completely bulletproof because of race conditions: in
-    * multi-level inheritance trees, someone else could concurrently be
-    * making another inheritance link that closes the loop but does not join
-    * either of the rels we have locked.  Preventing that seems to require
-    * exclusive locks on the entire inheritance tree, which is a cure worse
-    * than the disease.  find_all_inheritors() will cope with circularity
-    * anyway, so don't sweat it too much.
-    *
-    * We use weakest lock we can on child's children, namely AccessShareLock.
-    */
-   children = find_all_inheritors(RelationGetRelid(child_rel),
-                                  AccessShareLock, NULL);
-
-   if (list_member_oid(children, RelationGetRelid(parent_rel)))
-       ereport(ERROR,
-               (errcode(ERRCODE_DUPLICATE_TABLE),
-                errmsg("circular inheritance not allowed"),
-                errdetail("\"%s\" is already a child of \"%s\".",
-                          parent->relname,
-                          RelationGetRelationName(child_rel))));
-
-   /* If parent has OIDs then child must have OIDs */
-   if (parent_rel->rd_rel->relhasoids && !child_rel->rd_rel->relhasoids)
-       ereport(ERROR,
-               (errcode(ERRCODE_WRONG_OBJECT_TYPE),
-                errmsg("table \"%s\" without OIDs cannot inherit from table \"%s\" with OIDs",
-                       RelationGetRelationName(child_rel),
-                       RelationGetRelationName(parent_rel))));
-
    /* Match up the columns and bump attinhcount as needed */
    MergeAttributesIntoExisting(child_rel, parent_rel);
 
@@ -10189,16 +10747,8 @@ ATExecAddInherit(Relation child_rel, RangeVar *parent, LOCKMODE lockmode)
                             inhseqno + 1,
                             catalogRelation);
 
-   ObjectAddressSet(address, RelationRelationId,
-                    RelationGetRelid(parent_rel));
-
    /* Now we're done with pg_inherits */
    heap_close(catalogRelation, RowExclusiveLock);
-
-   /* keep our lock on the parent relation until commit */
-   heap_close(parent_rel, NoLock);
-
-   return address;
 }
 
 /*
@@ -10249,7 +10799,7 @@ constraints_equivalent(HeapTuple a, HeapTuple b, TupleDesc tupleDesc)
  * Check columns in child table match up with columns in parent, and increment
  * their attinhcount.
  *
- * Called by ATExecAddInherit
+ * Called by CreateInheritance
  *
  * Currently all parent columns must be found in child. Missing columns are an
  * error.  One day we might consider creating new columns like CREATE TABLE
@@ -10267,12 +10817,17 @@ MergeAttributesIntoExisting(Relation child_rel, Relation parent_rel)
    int         parent_natts;
    TupleDesc   tupleDesc;
    HeapTuple   tuple;
+   bool        child_is_partition = false;
 
    attrrel = heap_open(AttributeRelationId, RowExclusiveLock);
 
    tupleDesc = RelationGetDescr(parent_rel);
    parent_natts = tupleDesc->natts;
 
+   /* If parent_rel is a partitioned table, child_rel must be a partition */
+   if (parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+       child_is_partition = true;
+
    for (parent_attno = 1; parent_attno <= parent_natts; parent_attno++)
    {
        Form_pg_attribute attribute = tupleDesc->attrs[parent_attno - 1];
@@ -10320,6 +10875,18 @@ MergeAttributesIntoExisting(Relation child_rel, Relation parent_rel)
             * later on, this change will just roll back.)
             */
            childatt->attinhcount++;
+
+           /*
+            * In case of partitions, we must enforce that value of attislocal
+            * is same in all partitions. (Note: there are only inherited
+            * attributes in partitions)
+            */
+           if (child_is_partition)
+           {
+               Assert(childatt->attinhcount == 1);
+               childatt->attislocal = false;
+           }
+
            simple_heap_update(attrrel, &tuple->t_self, tuple);
            CatalogUpdateIndexes(attrrel, tuple);
            heap_freetuple(tuple);
@@ -10342,7 +10909,7 @@ MergeAttributesIntoExisting(Relation child_rel, Relation parent_rel)
  *
  * Constraints that are marked ONLY in the parent are ignored.
  *
- * Called by ATExecAddInherit
+ * Called by CreateInheritance
  *
  * Currently all constraints in parent must be present in the child. One day we
  * may consider adding new constraints like CREATE TABLE does.
@@ -10361,10 +10928,15 @@ MergeConstraintsIntoExisting(Relation child_rel, Relation parent_rel)
    SysScanDesc parent_scan;
    ScanKeyData parent_key;
    HeapTuple   parent_tuple;
+   bool        child_is_partition = false;
 
    catalog_relation = heap_open(ConstraintRelationId, RowExclusiveLock);
    tuple_desc = RelationGetDescr(catalog_relation);
 
+   /* If parent_rel is a partitioned table, child_rel must be a partition */
+   if (parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+       child_is_partition = true;
+
    /* Outer loop scans through the parent's constraint definitions */
    ScanKeyInit(&parent_key,
                Anum_pg_constraint_conrelid,
@@ -10441,6 +11013,18 @@ MergeConstraintsIntoExisting(Relation child_rel, Relation parent_rel)
            child_copy = heap_copytuple(child_tuple);
            child_con = (Form_pg_constraint) GETSTRUCT(child_copy);
            child_con->coninhcount++;
+
+           /*
+            * In case of partitions, an inherited constraint must be
+            * inherited only once since it cannot have multiple parents and
+            * it is never considered local.
+            */
+           if (child_is_partition)
+           {
+               Assert(child_con->coninhcount == 1);
+               child_con->conislocal = false;
+           }
+
            simple_heap_update(catalog_relation, &child_copy->t_self, child_copy);
            CatalogUpdateIndexes(catalog_relation, child_copy);
            heap_freetuple(child_copy);
@@ -10465,6 +11049,46 @@ MergeConstraintsIntoExisting(Relation child_rel, Relation parent_rel)
 /*
  * ALTER TABLE NO INHERIT
  *
+ * Return value is the address of the relation that is no longer parent.
+ */
+static ObjectAddress
+ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode)
+{
+   ObjectAddress   address;
+   Relation        parent_rel;
+
+   if (rel->rd_rel->relispartition)
+       ereport(ERROR,
+               (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                errmsg("cannot change inheritance of a partition")));
+
+   /*
+    * AccessShareLock on the parent is probably enough, seeing that DROP
+    * TABLE doesn't lock parent tables at all.  We need some lock since we'll
+    * be inspecting the parent's schema.
+    */
+   parent_rel = heap_openrv(parent, AccessShareLock);
+
+   /*
+    * We don't bother to check ownership of the parent table --- ownership of
+    * the child is presumed enough rights.
+    */
+
+   /* Off to RemoveInheritance() where most of the work happens */
+   RemoveInheritance(rel, parent_rel);
+
+   /* keep our lock on the parent relation until commit */
+   heap_close(parent_rel, NoLock);
+
+   ObjectAddressSet(address, RelationRelationId,
+                    RelationGetRelid(parent_rel));
+
+   return address;
+}
+
+/*
+ * RemoveInheritance
+ *
  * Drop a parent from the child's parents. This just adjusts the attinhcount
  * and attislocal of the columns and removes the pg_inherit and pg_depend
  * entries.
@@ -10478,13 +11102,11 @@ MergeConstraintsIntoExisting(Relation child_rel, Relation parent_rel)
  * coninhcount and conislocal for inherited constraints are adjusted in
  * exactly the same way.
  *
- * Return value is the address of the relation that is no longer parent.
+ * Common to ATExecDropInherit() and ATExecDetachPartition().
  */
-static ObjectAddress
-ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode)
+static void
+RemoveInheritance(Relation child_rel, Relation parent_rel)
 {
-   Relation    parent_rel;
-   Oid         parent_oid;
    Relation    catalogRelation;
    SysScanDesc scan;
    ScanKeyData key[3];
@@ -10493,19 +11115,11 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode)
                constraintTuple;
    List       *connames;
    bool        found = false;
-   ObjectAddress address;
+   bool        child_is_partition = false;
 
-   /*
-    * AccessShareLock on the parent is probably enough, seeing that DROP
-    * TABLE doesn't lock parent tables at all.  We need some lock since we'll
-    * be inspecting the parent's schema.
-    */
-   parent_rel = heap_openrv(parent, AccessShareLock);
-
-   /*
-    * We don't bother to check ownership of the parent table --- ownership of
-    * the child is presumed enough rights.
-    */
+   /* If parent_rel is a partitioned table, child_rel must be a partition */
+   if (parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+       child_is_partition = true;
 
    /*
     * Find and destroy the pg_inherits entry linking the two, or error out if
@@ -10515,7 +11129,7 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode)
    ScanKeyInit(&key[0],
                Anum_pg_inherits_inhrelid,
                BTEqualStrategyNumber, F_OIDEQ,
-               ObjectIdGetDatum(RelationGetRelid(rel)));
+               ObjectIdGetDatum(RelationGetRelid(child_rel)));
    scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId,
                              true, NULL, 1, key);
 
@@ -10536,11 +11150,20 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode)
    heap_close(catalogRelation, RowExclusiveLock);
 
    if (!found)
-       ereport(ERROR,
-               (errcode(ERRCODE_UNDEFINED_TABLE),
-                errmsg("relation \"%s\" is not a parent of relation \"%s\"",
-                       RelationGetRelationName(parent_rel),
-                       RelationGetRelationName(rel))));
+   {
+       if (child_is_partition)
+           ereport(ERROR,
+                   (errcode(ERRCODE_UNDEFINED_TABLE),
+                    errmsg("relation \"%s\" is not a partition of relation \"%s\"",
+                           RelationGetRelationName(child_rel),
+                           RelationGetRelationName(parent_rel))));
+       else
+           ereport(ERROR,
+                   (errcode(ERRCODE_UNDEFINED_TABLE),
+                    errmsg("relation \"%s\" is not a parent of relation \"%s\"",
+                           RelationGetRelationName(parent_rel),
+                           RelationGetRelationName(child_rel))));
+   }
 
    /*
     * Search through child columns looking for ones matching parent rel
@@ -10549,7 +11172,7 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode)
    ScanKeyInit(&key[0],
                Anum_pg_attribute_attrelid,
                BTEqualStrategyNumber, F_OIDEQ,
-               ObjectIdGetDatum(RelationGetRelid(rel)));
+               ObjectIdGetDatum(RelationGetRelid(child_rel)));
    scan = systable_beginscan(catalogRelation, AttributeRelidNumIndexId,
                              true, NULL, 1, key);
    while (HeapTupleIsValid(attributeTuple = systable_getnext(scan)))
@@ -10611,7 +11234,7 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode)
    ScanKeyInit(&key[0],
                Anum_pg_constraint_conrelid,
                BTEqualStrategyNumber, F_OIDEQ,
-               ObjectIdGetDatum(RelationGetRelid(rel)));
+               ObjectIdGetDatum(RelationGetRelid(child_rel)));
    scan = systable_beginscan(catalogRelation, ConstraintRelidIndexId,
                              true, NULL, 1, key);
 
@@ -10642,7 +11265,7 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode)
 
            if (copy_con->coninhcount <= 0)     /* shouldn't happen */
                elog(ERROR, "relation %u has non-inherited constraint \"%s\"",
-                    RelationGetRelid(rel), NameStr(copy_con->conname));
+                    RelationGetRelid(child_rel), NameStr(copy_con->conname));
 
            copy_con->coninhcount--;
            if (copy_con->coninhcount == 0)
@@ -10654,30 +11277,20 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode)
        }
    }
 
-   parent_oid = RelationGetRelid(parent_rel);
-
    systable_endscan(scan);
    heap_close(catalogRelation, RowExclusiveLock);
 
-   drop_parent_dependency(RelationGetRelid(rel),
+   drop_parent_dependency(RelationGetRelid(child_rel),
                           RelationRelationId,
                           RelationGetRelid(parent_rel));
-
    /*
     * Post alter hook of this inherits. Since object_access_hook doesn't take
     * multiple object identifiers, we relay oid of parent relation using
     * auxiliary_id argument.
     */
    InvokeObjectPostAlterHookArg(InheritsRelationId,
-                                RelationGetRelid(rel), 0,
+                                RelationGetRelid(child_rel), 0,
                                 RelationGetRelid(parent_rel), false);
-
-   /* keep our lock on the parent relation until commit */
-   heap_close(parent_rel, NoLock);
-
-   ObjectAddressSet(address, RelationRelationId, parent_oid);
-
-   return address;
 }
 
 /*
@@ -11499,7 +12112,8 @@ AlterTableNamespaceInternal(Relation rel, Oid oldNspOid, Oid nspOid,
 
    /* Fix other dependent stuff */
    if (rel->rd_rel->relkind == RELKIND_RELATION ||
-       rel->rd_rel->relkind == RELKIND_MATVIEW)
+       rel->rd_rel->relkind == RELKIND_MATVIEW ||
+       rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
    {
        AlterIndexNamespaces(classRel, rel, oldNspOid, nspOid, objsMoved);
        AlterSeqNamespaces(classRel, rel, oldNspOid, nspOid,
@@ -11948,7 +12562,7 @@ RangeVarCallbackOwnsTable(const RangeVar *relation,
    if (!relkind)
        return;
    if (relkind != RELKIND_RELATION && relkind != RELKIND_TOASTVALUE &&
-       relkind != RELKIND_MATVIEW)
+       relkind != RELKIND_MATVIEW && relkind != RELKIND_PARTITIONED_TABLE)
        ereport(ERROR,
                (errcode(ERRCODE_WRONG_OBJECT_TYPE),
                 errmsg("\"%s\" is not a table or materialized view", relation->relname)));
@@ -12105,7 +12719,8 @@ RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid, Oid oldrelid,
        relkind != RELKIND_VIEW &&
        relkind != RELKIND_MATVIEW &&
        relkind != RELKIND_SEQUENCE &&
-       relkind != RELKIND_FOREIGN_TABLE)
+       relkind != RELKIND_FOREIGN_TABLE &&
+       relkind != RELKIND_PARTITIONED_TABLE)
        ereport(ERROR,
                (errcode(ERRCODE_WRONG_OBJECT_TYPE),
                 errmsg("\"%s\" is not a table, view, materialized view, sequence, or foreign table",
@@ -12113,3 +12728,701 @@ RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid, Oid oldrelid,
 
    ReleaseSysCache(tuple);
 }
+
+/*
+ * Transform any expressions present in the partition key
+ */
+static PartitionSpec *
+transformPartitionSpec(Relation rel, PartitionSpec *partspec, char *strategy)
+{
+   PartitionSpec  *newspec;
+   ParseState     *pstate;
+   RangeTblEntry  *rte;
+   ListCell       *l;
+
+   newspec = (PartitionSpec *) makeNode(PartitionSpec);
+
+   newspec->strategy = partspec->strategy;
+   newspec->location = partspec->location;
+   newspec->partParams = NIL;
+
+   /* Parse partitioning strategy name */
+   if (!pg_strcasecmp(partspec->strategy, "list"))
+       *strategy = PARTITION_STRATEGY_LIST;
+   else if (!pg_strcasecmp(partspec->strategy, "range"))
+       *strategy = PARTITION_STRATEGY_RANGE;
+   else
+       ereport(ERROR,
+               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                errmsg("unrecognized partitioning strategy \"%s\"",
+                       partspec->strategy)));
+
+   /*
+    * Create a dummy ParseState and insert the target relation as its sole
+    * rangetable entry.  We need a ParseState for transformExpr.
+    */
+   pstate = make_parsestate(NULL);
+   rte = addRangeTableEntryForRelation(pstate, rel, NULL, false, true);
+   addRTEtoQuery(pstate, rte, true, true, true);
+
+   /* take care of any partition expressions */
+   foreach(l, partspec->partParams)
+   {
+       ListCell       *lc;
+       PartitionElem  *pelem = (PartitionElem *) lfirst(l);
+
+       /* Check for PARTITION BY ... (foo, foo) */
+       foreach(lc, newspec->partParams)
+       {
+           PartitionElem   *pparam = (PartitionElem *) lfirst(lc);
+
+           if (pelem->name && pparam->name &&
+                   !strcmp(pelem->name, pparam->name))
+               ereport(ERROR,
+                       (errcode(ERRCODE_DUPLICATE_COLUMN),
+                        errmsg("column \"%s\" appears more than once in partition key",
+                               pelem->name),
+                        parser_errposition(pstate, pelem->location)));
+       }
+
+       if (pelem->expr)
+       {
+           /* Now do parse transformation of the expression */
+           pelem->expr = transformExpr(pstate, pelem->expr,
+                                       EXPR_KIND_PARTITION_EXPRESSION);
+
+           /* we have to fix its collations too */
+           assign_expr_collations(pstate, pelem->expr);
+       }
+
+       newspec->partParams = lappend(newspec->partParams, pelem);
+   }
+
+   return newspec;
+}
+
+/*
+ * Compute per-partition-column information from a list of PartitionElem's
+ */
+static void
+ComputePartitionAttrs(Relation rel, List *partParams, AttrNumber *partattrs,
+                     List **partexprs, Oid *partopclass, Oid *partcollation)
+{
+   int         attn;
+   ListCell   *lc;
+
+   attn = 0;
+   foreach(lc, partParams)
+   {
+       PartitionElem  *pelem = (PartitionElem *) lfirst(lc);
+       Oid     atttype;
+       Oid     attcollation;
+
+       if (pelem->name != NULL)
+       {
+           /* Simple attribute reference */
+           HeapTuple   atttuple;
+           Form_pg_attribute attform;
+
+           atttuple = SearchSysCacheAttName(RelationGetRelid(rel), pelem->name);
+           if (!HeapTupleIsValid(atttuple))
+               ereport(ERROR,
+                       (errcode(ERRCODE_UNDEFINED_COLUMN),
+                        errmsg("column \"%s\" named in partition key does not exist",
+                               pelem->name)));
+           attform = (Form_pg_attribute) GETSTRUCT(atttuple);
+
+           if (attform->attnum <= 0)
+               ereport(ERROR,
+                       (errcode(ERRCODE_UNDEFINED_COLUMN),
+                        errmsg("cannot use system column \"%s\" in partition key",
+                               pelem->name)));
+
+           partattrs[attn] = attform->attnum;
+           atttype = attform->atttypid;
+           attcollation = attform->attcollation;
+           ReleaseSysCache(atttuple);
+
+           /* Note that whole-row references can't happen here; see below */
+       }
+       else
+       {
+           /* Expression */
+           Node       *expr = pelem->expr;
+
+           Assert(expr != NULL);
+           atttype = exprType(expr);
+           attcollation = exprCollation(expr);
+
+           /*
+            * Strip any top-level COLLATE clause.  This ensures that we treat
+            * "x COLLATE y" and "(x COLLATE y)" alike.
+            */
+           while (IsA(expr, CollateExpr))
+               expr = (Node *) ((CollateExpr *) expr)->arg;
+
+           if (IsA(expr, Var) &&
+               ((Var *) expr)->varattno != InvalidAttrNumber)
+           {
+               /*
+                * User wrote "(column)" or "(column COLLATE something)".
+                * Treat it like simple attribute anyway.
+                */
+               partattrs[attn] = ((Var *) expr)->varattno;
+           }
+           else
+           {
+               Bitmapset   *expr_attrs = NULL;
+
+               partattrs[attn] = 0;    /* marks the column as expression */
+               *partexprs = lappend(*partexprs, expr);
+
+               /*
+                * Note that expression_planner does not change the passed in
+                * expression destructively and we have already saved the
+                * expression to be stored into the catalog above.
+                */
+               expr = (Node *) expression_planner((Expr *) expr);
+
+               /*
+                * Partition expression cannot contain mutable functions,
+                * because a given row must always map to the same partition
+                * as long as there is no change in the partition boundary
+                * structure.
+                */
+               if (contain_mutable_functions(expr))
+                   ereport(ERROR,
+                           (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+                            errmsg("functions in partition key expression must be marked IMMUTABLE")));
+
+               /*
+                * While it is not exactly *wrong* for an expression to be
+                * a constant value, it seems better to prevent such input.
+                */
+               if (IsA(expr, Const))
+                   ereport(ERROR,
+                           (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+                            errmsg("cannot use constant expression as partition key")));
+
+               /*
+                * transformPartitionSpec() should have already rejected subqueries,
+                * aggregates, window functions, and SRFs, based on the EXPR_KIND_
+                * for partition expressions.
+                */
+
+               /* Cannot have expressions containing whole-row references */
+               pull_varattnos(expr, 1, &expr_attrs);
+               if (bms_is_member(0 - FirstLowInvalidHeapAttributeNumber,
+                                 expr_attrs))
+                   ereport(ERROR,
+                           (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+                            errmsg("partition key expressions cannot contain whole-row references")));
+           }
+       }
+
+       /*
+        * Apply collation override if any
+        */
+       if (pelem->collation)
+           attcollation = get_collation_oid(pelem->collation, false);
+
+       /*
+        * Check we have a collation iff it's a collatable type.  The only
+        * expected failures here are (1) COLLATE applied to a noncollatable
+        * type, or (2) partition expression had an unresolved collation.
+        * But we might as well code this to be a complete consistency check.
+        */
+       if (type_is_collatable(atttype))
+       {
+           if (!OidIsValid(attcollation))
+               ereport(ERROR,
+                       (errcode(ERRCODE_INDETERMINATE_COLLATION),
+                        errmsg("could not determine which collation to use for partition expression"),
+                        errhint("Use the COLLATE clause to set the collation explicitly.")));
+       }
+       else
+       {
+           if (OidIsValid(attcollation))
+               ereport(ERROR,
+                       (errcode(ERRCODE_DATATYPE_MISMATCH),
+                        errmsg("collations are not supported by type %s",
+                               format_type_be(atttype))));
+       }
+
+       partcollation[attn] = attcollation;
+
+       /*
+        * Identify a btree opclass to use. Currently, we use only btree
+        * operators, which seems enough for list and range partitioning.
+        */
+       if (!pelem->opclass)
+       {
+           partopclass[attn] = GetDefaultOpClass(atttype, BTREE_AM_OID);
+
+           if (!OidIsValid(partopclass[attn]))
+               ereport(ERROR,
+                       (errcode(ERRCODE_UNDEFINED_OBJECT),
+                        errmsg("data type %s has no default btree operator class",
+                               format_type_be(atttype)),
+                        errhint("You must specify a btree operator class or define a default btree operator class for the data type.")));
+       }
+       else
+           partopclass[attn] = ResolveOpClass(pelem->opclass,
+                                              atttype,
+                                              "btree",
+                                              BTREE_AM_OID);
+
+       attn++;
+   }
+}
+
+/*
+ * ALTER TABLE <name> ATTACH PARTITION <partition-name> FOR VALUES
+ *
+ * Return the address of the newly attached partition.
+ */
+static ObjectAddress
+ATExecAttachPartition(List **wqueue, Relation rel, PartitionCmd *cmd)
+{
+   PartitionKey    key = RelationGetPartitionKey(rel);
+   Relation    attachRel,
+               catalog;
+   List       *childrels;
+   TupleConstr *attachRel_constr;
+   List       *partConstraint,
+              *existConstraint;
+   SysScanDesc scan;
+   ScanKeyData skey;
+   HeapTuple   tuple;
+   AttrNumber  attno;
+   int         natts;
+   TupleDesc   tupleDesc;
+   bool        skip_validate = false;
+   ObjectAddress address;
+
+   attachRel = heap_openrv(cmd->name, AccessExclusiveLock);
+
+   /*
+    * Must be owner of both parent and source table -- parent was checked by
+    * ATSimplePermissions call in ATPrepCmd
+    */
+   ATSimplePermissions(attachRel, ATT_TABLE | ATT_FOREIGN_TABLE);
+
+   /* A partition can only have one parent */
+   if (attachRel->rd_rel->relispartition)
+       ereport(ERROR,
+               (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                errmsg("\"%s\" is already a partition",
+                       RelationGetRelationName(attachRel))));
+
+   if (attachRel->rd_rel->reloftype)
+       ereport(ERROR,
+               (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                errmsg("cannot attach a typed table as partition")));
+
+   /*
+    * Table being attached should not already be part of inheritance; either
+    * as a child table...
+    */
+   catalog = heap_open(InheritsRelationId, AccessShareLock);
+   ScanKeyInit(&skey,
+               Anum_pg_inherits_inhrelid,
+               BTEqualStrategyNumber, F_OIDEQ,
+               ObjectIdGetDatum(RelationGetRelid(attachRel)));
+   scan = systable_beginscan(catalog, InheritsRelidSeqnoIndexId, true,
+                             NULL, 1, &skey);
+   if (HeapTupleIsValid(systable_getnext(scan)))
+       ereport(ERROR,
+               (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                errmsg("cannot attach inheritance child as partition")));
+   systable_endscan(scan);
+
+   /* ...or as a parent table (except the case when it is partitioned) */
+   ScanKeyInit(&skey,
+               Anum_pg_inherits_inhparent,
+               BTEqualStrategyNumber, F_OIDEQ,
+               ObjectIdGetDatum(RelationGetRelid(attachRel)));
+   scan = systable_beginscan(catalog, InheritsParentIndexId, true, NULL,
+                             1, &skey);
+   if (HeapTupleIsValid(systable_getnext(scan)) &&
+       attachRel->rd_rel->relkind == RELKIND_RELATION)
+       ereport(ERROR,
+               (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                errmsg("cannot attach inheritance parent as partition")));
+   systable_endscan(scan);
+   heap_close(catalog, AccessShareLock);
+
+   /*
+    * Prevent circularity by seeing if rel is a partition of attachRel.
+    * (In particular, this disallows making a rel a partition of itself.)
+    */
+   childrels = find_all_inheritors(RelationGetRelid(attachRel),
+                                   AccessShareLock, NULL);
+   if (list_member_oid(childrels, RelationGetRelid(rel)))
+       ereport(ERROR,
+               (errcode(ERRCODE_DUPLICATE_TABLE),
+                errmsg("circular inheritance not allowed"),
+                errdetail("\"%s\" is already a child of \"%s\".",
+                          RelationGetRelationName(rel),
+                          RelationGetRelationName(attachRel))));
+
+   /* Temp parent cannot have a partition that is itself not a temp */
+   if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
+       attachRel->rd_rel->relpersistence != RELPERSISTENCE_TEMP)
+       ereport(ERROR,
+               (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                errmsg("cannot attach a permanent relation as partition of temporary relation \"%s\"",
+                       RelationGetRelationName(rel))));
+
+   /* If the parent is temp, it must belong to this session */
+   if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
+       !rel->rd_islocaltemp)
+       ereport(ERROR,
+               (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+       errmsg("cannot attach as partition of temporary relation of another session")));
+
+   /* Ditto for the partition */
+   if (attachRel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
+       !attachRel->rd_islocaltemp)
+       ereport(ERROR,
+               (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+        errmsg("cannot attach temporary relation of another session as partition")));
+
+   /* If parent has OIDs then child must have OIDs */
+   if (rel->rd_rel->relhasoids && !attachRel->rd_rel->relhasoids)
+       ereport(ERROR,
+               (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                errmsg("cannot attach table \"%s\" without OIDs as partition of"
+                       " table \"%s\" with OIDs", RelationGetRelationName(attachRel),
+                       RelationGetRelationName(rel))));
+
+   /* OTOH, if parent doesn't have them, do not allow in attachRel either */
+   if (attachRel->rd_rel->relhasoids && !rel->rd_rel->relhasoids)
+       ereport(ERROR,
+               (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                errmsg("cannot attach table \"%s\" with OIDs as partition of table"
+                       " \"%s\" without OIDs", RelationGetRelationName(attachRel),
+                       RelationGetRelationName(rel))));
+
+   /* Check if there are any columns in attachRel that aren't in the parent */
+   tupleDesc = RelationGetDescr(attachRel);
+   natts = tupleDesc->natts;
+   for (attno = 1; attno <= natts; attno++)
+   {
+       Form_pg_attribute attribute = tupleDesc->attrs[attno - 1];
+       char       *attributeName = NameStr(attribute->attname);
+
+       /* Ignore dropped */
+       if (attribute->attisdropped)
+           continue;
+
+       /* Find same column in parent (matching on column name). */
+       tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), attributeName);
+       if (!HeapTupleIsValid(tuple))
+           ereport(ERROR,
+                   (errcode(ERRCODE_DATATYPE_MISMATCH),
+                    errmsg("table \"%s\" contains column \"%s\" not found in parent \"%s\"",
+                           RelationGetRelationName(attachRel), attributeName,
+                           RelationGetRelationName(rel)),
+                    errdetail("New partition should contain only the columns present in parent.")));
+   }
+
+   /* OK to create inheritance.  Rest of the checks performed there */
+   CreateInheritance(attachRel, rel);
+
+   /*
+    * Check that the new partition's bound is valid and does not overlap any
+    * of existing partitions of the parent - note that it does not return
+    * on error.
+    */
+   check_new_partition_bound(RelationGetRelationName(attachRel), rel,
+                             cmd->bound);
+
+   /* Update the pg_class entry. */
+   StorePartitionBound(attachRel, cmd->bound);
+
+   /*
+    * Generate partition constraint from the partition bound specification.
+    * If the parent itself is a partition, make sure to include its
+    * constraint as well.
+    */
+   partConstraint = list_concat(get_qual_from_partbound(attachRel, rel,
+                                                        cmd->bound),
+                                RelationGetPartitionQual(rel, true));
+   partConstraint = (List *) eval_const_expressions(NULL,
+                                                    (Node *) partConstraint);
+   partConstraint = (List *) canonicalize_qual((Expr *) partConstraint);
+   partConstraint = list_make1(make_ands_explicit(partConstraint));
+
+   /*
+    * Check if we can do away with having to scan the table being attached
+    * to validate the partition constraint, by *proving* that the existing
+    * constraints of the table *imply* the partition predicate.  We include
+    * the table's check constraints and NOT NULL constraints in the list of
+    * clauses passed to predicate_implied_by().
+    *
+    * There is a case in which we cannot rely on just the result of the
+    * proof.
+    */
+   tupleDesc = RelationGetDescr(attachRel);
+   attachRel_constr = tupleDesc->constr;
+   existConstraint = NIL;
+   if (attachRel_constr > 0)
+   {
+       int         num_check = attachRel_constr->num_check;
+       int         i;
+       Bitmapset  *not_null_attrs = NULL;
+       List       *part_constr;
+       ListCell   *lc;
+       bool        partition_accepts_null = true;
+       int         partnatts;
+
+       if (attachRel_constr->has_not_null)
+       {
+           int         natts = attachRel->rd_att->natts;
+
+           for (i = 1; i <= natts; i++)
+           {
+               Form_pg_attribute att = attachRel->rd_att->attrs[i - 1];
+
+               if (att->attnotnull && !att->attisdropped)
+               {
+                   NullTest   *ntest = makeNode(NullTest);
+
+                   ntest->arg = (Expr *) makeVar(1,
+                                                 i,
+                                                 att->atttypid,
+                                                 att->atttypmod,
+                                                 att->attcollation,
+                                                 0);
+                   ntest->nulltesttype = IS_NOT_NULL;
+
+                   /*
+                    * argisrow=false is correct even for a composite column,
+                    * because attnotnull does not represent a SQL-spec IS NOT
+                    * NULL test in such a case, just IS DISTINCT FROM NULL.
+                    */
+                   ntest->argisrow = false;
+                   ntest->location = -1;
+                   existConstraint = lappend(existConstraint, ntest);
+                   not_null_attrs = bms_add_member(not_null_attrs, i);
+               }
+           }
+       }
+
+       for (i = 0; i < num_check; i++)
+       {
+           Node       *cexpr;
+
+           /*
+            * If this constraint hasn't been fully validated yet, we must
+            * ignore it here.
+            */
+           if (!attachRel_constr->check[i].ccvalid)
+               continue;
+
+           cexpr = stringToNode(attachRel_constr->check[i].ccbin);
+
+           /*
+            * Run each expression through const-simplification and
+            * canonicalization.  It is necessary, because we will be
+            * comparing it to similarly-processed qual clauses, and may fail
+            * to detect valid matches without this.
+            */
+           cexpr = eval_const_expressions(NULL, cexpr);
+           cexpr = (Node *) canonicalize_qual((Expr *) cexpr);
+
+           existConstraint = list_concat(existConstraint,
+                                         make_ands_implicit((Expr *) cexpr));
+       }
+
+       existConstraint = list_make1(make_ands_explicit(existConstraint));
+
+       /* And away we go ... */
+       if (predicate_implied_by(partConstraint, existConstraint))
+           skip_validate = true;
+
+       /*
+        * We choose to err on the safer side, ie, give up on skipping the
+        * the validation scan, if the partition key column doesn't have
+        * the NOT NULL constraint and the table is to become a list partition
+        * that does not accept nulls.  In this case, the partition predicate
+        * (partConstraint) does include an 'key IS NOT NULL' expression,
+        * however, because of the way predicate_implied_by_simple_clause()
+        * is designed to handle IS NOT NULL predicates in the absence of a
+        * IS NOT NULL clause, we cannot rely on just the above proof.
+        *
+        * That is not an issue in case of a range partition, because if there
+        * were no NOT NULL constraint defined on the key columns, an error
+        * would be thrown before we get here anyway.  That is not true,
+        * however, if any of the partition keys is an expression, which is
+        * handled below.
+        */
+       part_constr = linitial(partConstraint);
+       part_constr = make_ands_implicit((Expr *) part_constr);
+
+       /*
+        * part_constr contains an IS NOT NULL expression, if this is a list
+        * partition that does not accept nulls (in fact, also if this is a
+        * range partition and some partition key is an expression, but we
+        * never skip validation in that case anyway; see below)
+        */
+       foreach(lc, part_constr)
+       {
+           Node *expr = lfirst(lc);
+
+           if (IsA(expr, NullTest) &&
+               ((NullTest *) expr)->nulltesttype == IS_NOT_NULL)
+           {
+               partition_accepts_null = false;
+               break;
+           }
+       }
+
+       partnatts = get_partition_natts(key);
+       for (i = 0; i < partnatts; i++)
+       {
+           AttrNumber  partattno;
+
+           partattno = get_partition_col_attnum(key, i);
+
+           /* If partition key is an expression, must not skip validation */
+           if (!partition_accepts_null &&
+               (partattno == 0 ||
+                !bms_is_member(partattno, not_null_attrs)))
+               skip_validate = false;
+       }
+   }
+
+   if (skip_validate)
+       elog(NOTICE, "skipping scan to validate partition constraint");
+
+   /*
+    * Set up to have the table to be scanned to validate the partition
+    * constraint (see partConstraint above).  If it's a partitioned table,
+    * we instead schdule its leaf partitions to be scanned instead.
+    */
+   if (!skip_validate)
+   {
+       List       *all_parts;
+       ListCell   *lc;
+
+       /* Take an exclusive lock on the partitions to be checked */
+       if (attachRel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+           all_parts = find_all_inheritors(RelationGetRelid(attachRel),
+                                            AccessExclusiveLock, NULL);
+       else
+           all_parts = list_make1_oid(RelationGetRelid(attachRel));
+
+       foreach(lc, all_parts)
+       {
+           AlteredTableInfo *tab;
+           Oid         part_relid = lfirst_oid(lc);
+           Relation    part_rel;
+           Expr       *constr;
+
+           /* Lock already taken */
+           if (part_relid != RelationGetRelid(attachRel))
+               part_rel = heap_open(part_relid, NoLock);
+           else
+               part_rel = attachRel;
+
+           /*
+            * Skip if it's a partitioned table.  Only RELKIND_RELATION
+            * relations (ie, leaf partitions) need to be scanned.
+            */
+           if (part_rel != attachRel &&
+               part_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+           {
+               heap_close(part_rel, NoLock);
+               continue;
+           }
+
+           /* Grab a work queue entry */
+           tab = ATGetQueueEntry(wqueue, part_rel);
+
+           constr = linitial(partConstraint);
+           tab->partition_constraint = make_ands_implicit((Expr *) constr);
+
+           /* keep our lock until commit */
+           if (part_rel != attachRel)
+               heap_close(part_rel, NoLock);
+       }
+   }
+
+   /*
+    * Invalidate the relcache so that the new partition is now included
+    * in rel's partition descriptor.
+    */
+   CacheInvalidateRelcache(rel);
+
+   ObjectAddressSet(address, RelationRelationId, RelationGetRelid(attachRel));
+
+   /* keep our lock until commit */
+   heap_close(attachRel, NoLock);
+
+   return address;
+}
+
+/*
+ * ALTER TABLE DETACH PARTITION
+ *
+ * Return the address of the relation that is no longer a partition of rel.
+ */
+static ObjectAddress
+ATExecDetachPartition(Relation rel, RangeVar *name)
+{
+   Relation    partRel,
+               classRel;
+   HeapTuple   tuple,
+               newtuple;
+   Datum       new_val[Natts_pg_class];
+   bool        isnull,
+               new_null[Natts_pg_class],
+               new_repl[Natts_pg_class];
+   ObjectAddress address;
+
+   partRel = heap_openrv(name, AccessShareLock);
+
+   /* All inheritance related checks are performed within the function */
+   RemoveInheritance(partRel, rel);
+
+   /* Update pg_class tuple */
+   classRel = heap_open(RelationRelationId, RowExclusiveLock);
+   tuple = SearchSysCacheCopy1(RELOID,
+                               ObjectIdGetDatum(RelationGetRelid(partRel)));
+   Assert(((Form_pg_class) GETSTRUCT(tuple))->relispartition);
+
+   (void) SysCacheGetAttr(RELOID, tuple, Anum_pg_class_relpartbound,
+                          &isnull);
+   Assert(!isnull);
+
+   /* Clear relpartbound and reset relispartition */
+   memset(new_val, 0, sizeof(new_val));
+   memset(new_null, false, sizeof(new_null));
+   memset(new_repl, false, sizeof(new_repl));
+   new_val[Anum_pg_class_relpartbound - 1] = (Datum) 0;
+   new_null[Anum_pg_class_relpartbound - 1] = true;
+   new_repl[Anum_pg_class_relpartbound - 1] = true;
+   newtuple = heap_modify_tuple(tuple, RelationGetDescr(classRel),
+                                new_val, new_null, new_repl);
+
+   ((Form_pg_class) GETSTRUCT(newtuple))->relispartition = false;
+   simple_heap_update(classRel, &newtuple->t_self, newtuple);
+   CatalogUpdateIndexes(classRel, newtuple);
+   heap_freetuple(newtuple);
+   heap_close(classRel, RowExclusiveLock);
+
+   /*
+    * Invalidate the relcache so that the partition is no longer included
+    * in our partition descriptor.
+    */
+   CacheInvalidateRelcache(rel);
+
+   ObjectAddressSet(address, RelationRelationId, RelationGetRelid(partRel));
+
+   /* keep our lock until commit */
+   heap_close(partRel, NoLock);
+
+   return address;
+}
index 1c264b773613819ff3f0cc638d47d8a93deb0cd9..02e9693f28f1d58b66c383c5d6e11757520bd99a 100644 (file)
@@ -176,7 +176,8 @@ CreateTrigger(CreateTrigStmt *stmt, const char *queryString,
     * Triggers must be on tables or views, and there are additional
     * relation-type-specific restrictions.
     */
-   if (rel->rd_rel->relkind == RELKIND_RELATION)
+   if (rel->rd_rel->relkind == RELKIND_RELATION ||
+       rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
    {
        /* Tables can't have INSTEAD OF triggers */
        if (stmt->timing != TRIGGER_TYPE_BEFORE &&
@@ -186,6 +187,13 @@ CreateTrigger(CreateTrigStmt *stmt, const char *queryString,
                     errmsg("\"%s\" is a table",
                            RelationGetRelationName(rel)),
                     errdetail("Tables cannot have INSTEAD OF triggers.")));
+       /* Disallow ROW triggers on partitioned tables */
+       if (stmt->row && rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+           ereport(ERROR,
+                   (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                    errmsg("\"%s\" is a partitioned table",
+                           RelationGetRelationName(rel)),
+                    errdetail("Partitioned tables cannot have ROW triggers.")));
    }
    else if (rel->rd_rel->relkind == RELKIND_VIEW)
    {
@@ -1211,7 +1219,8 @@ RemoveTriggerById(Oid trigOid)
 
    if (rel->rd_rel->relkind != RELKIND_RELATION &&
        rel->rd_rel->relkind != RELKIND_VIEW &&
-       rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
+       rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
+       rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
        ereport(ERROR,
                (errcode(ERRCODE_WRONG_OBJECT_TYPE),
                 errmsg("\"%s\" is not a table, view, or foreign table",
@@ -1316,7 +1325,8 @@ RangeVarCallbackForRenameTrigger(const RangeVar *rv, Oid relid, Oid oldrelid,
 
    /* only tables and views can have triggers */
    if (form->relkind != RELKIND_RELATION && form->relkind != RELKIND_VIEW &&
-       form->relkind != RELKIND_FOREIGN_TABLE)
+       form->relkind != RELKIND_FOREIGN_TABLE &&
+       form->relkind != RELKIND_PARTITIONED_TABLE)
        ereport(ERROR,
                (errcode(ERRCODE_WRONG_OBJECT_TYPE),
                 errmsg("\"%s\" is not a table, view, or foreign table",
index 056933a5845c73f177a8ccd0ef3dee8cb00033a6..5e3989acd225b14ad2a5518e8ad6e4ab64ea2387 100644 (file)
@@ -2107,7 +2107,8 @@ DefineCompositeType(RangeVar *typevar, List *coldeflist)
    /*
     * Finally create the relation.  This also creates the type.
     */
-   DefineRelation(createStmt, RELKIND_COMPOSITE_TYPE, InvalidOid, &address);
+   DefineRelation(createStmt, RELKIND_COMPOSITE_TYPE, InvalidOid, &address,
+                  NULL);
 
    return address;
 }
index 58bbf5548bc9c7336392c807f22ede81d0376380..b1be2f7ad5927e15f7c44811fcfa307834440536 100644 (file)
@@ -1314,7 +1314,8 @@ vacuum_rel(Oid relid, RangeVar *relation, int options, VacuumParams *params)
     */
    if (onerel->rd_rel->relkind != RELKIND_RELATION &&
        onerel->rd_rel->relkind != RELKIND_MATVIEW &&
-       onerel->rd_rel->relkind != RELKIND_TOASTVALUE)
+       onerel->rd_rel->relkind != RELKIND_TOASTVALUE &&
+       onerel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
    {
        ereport(WARNING,
                (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
index 325a81096fb6da84760434049cb84689d51e2ee5..c6b0e4f2b3ca0a3e9cf8a176177b95f0206f3b41 100644 (file)
@@ -228,7 +228,8 @@ DefineVirtualRelation(RangeVar *relation, List *tlist, bool replace,
         * existing view, so we don't need more code to complain if "replace"
         * is false).
         */
-       address = DefineRelation(createStmt, RELKIND_VIEW, InvalidOid, NULL);
+       address = DefineRelation(createStmt, RELKIND_VIEW, InvalidOid, NULL,
+                                NULL);
        Assert(address.objectId != InvalidOid);
        return address;
    }
index 71c07288a19082d790411a5d2369035ad461cdac..0f47c7e01041da11021a170c1dd6ae30765b32e5 100644 (file)
@@ -42,6 +42,7 @@
 #include "access/transam.h"
 #include "access/xact.h"
 #include "catalog/namespace.h"
+#include "catalog/partition.h"
 #include "commands/matview.h"
 #include "commands/trigger.h"
 #include "executor/execdebug.h"
@@ -825,6 +826,7 @@ InitPlan(QueryDesc *queryDesc, int eflags)
            InitResultRelInfo(resultRelInfo,
                              resultRelation,
                              resultRelationIndex,
+                             true,
                              estate->es_instrument);
            resultRelInfo++;
        }
@@ -1019,6 +1021,7 @@ CheckValidResultRel(Relation resultRel, CmdType operation)
    switch (resultRel->rd_rel->relkind)
    {
        case RELKIND_RELATION:
+       case RELKIND_PARTITIONED_TABLE:
            /* OK */
            break;
        case RELKIND_SEQUENCE:
@@ -1152,6 +1155,7 @@ CheckValidRowMarkRel(Relation rel, RowMarkType markType)
    switch (rel->rd_rel->relkind)
    {
        case RELKIND_RELATION:
+       case RELKIND_PARTITIONED_TABLE:
            /* OK */
            break;
        case RELKIND_SEQUENCE:
@@ -1212,6 +1216,7 @@ void
 InitResultRelInfo(ResultRelInfo *resultRelInfo,
                  Relation resultRelationDesc,
                  Index resultRelationIndex,
+                 bool load_partition_check,
                  int instrument_options)
 {
    MemSet(resultRelInfo, 0, sizeof(ResultRelInfo));
@@ -1249,6 +1254,10 @@ InitResultRelInfo(ResultRelInfo *resultRelInfo,
    resultRelInfo->ri_ConstraintExprs = NULL;
    resultRelInfo->ri_junkFilter = NULL;
    resultRelInfo->ri_projectReturning = NULL;
+   if (load_partition_check)
+       resultRelInfo->ri_PartitionCheck =
+                           RelationGetPartitionQual(resultRelationDesc,
+                                                    true);
 }
 
 /*
@@ -1311,6 +1320,7 @@ ExecGetTriggerResultRel(EState *estate, Oid relid)
    InitResultRelInfo(rInfo,
                      rel,
                      0,        /* dummy rangetable index */
+                     true,
                      estate->es_instrument);
    estate->es_trig_target_relations =
        lappend(estate->es_trig_target_relations, rInfo);
@@ -1691,6 +1701,46 @@ ExecRelCheck(ResultRelInfo *resultRelInfo,
    return NULL;
 }
 
+/*
+ * ExecPartitionCheck --- check that tuple meets the partition constraint.
+ *
+ * Note: This is called *iff* resultRelInfo is the main target table.
+ */
+static bool
+ExecPartitionCheck(ResultRelInfo *resultRelInfo, TupleTableSlot *slot,
+                  EState *estate)
+{
+   ExprContext *econtext;
+
+   /*
+    * If first time through, build expression state tree for the partition
+    * check expression.  Keep it in the per-query memory context so they'll
+    * survive throughout the query.
+    */
+   if (resultRelInfo->ri_PartitionCheckExpr == NULL)
+   {
+       List *qual = resultRelInfo->ri_PartitionCheck;
+
+       resultRelInfo->ri_PartitionCheckExpr = (List *)
+                                   ExecPrepareExpr((Expr *) qual, estate);
+   }
+
+   /*
+    * We will use the EState's per-tuple context for evaluating constraint
+    * expressions (creating it if it's not already there).
+    */
+   econtext = GetPerTupleExprContext(estate);
+
+   /* Arrange for econtext's scan tuple to be the tuple under test */
+   econtext->ecxt_scantuple = slot;
+
+   /*
+    * As in case of the catalogued constraints, we treat a NULL result as
+    * success here, not a failure.
+    */
+   return ExecQual(resultRelInfo->ri_PartitionCheckExpr, econtext, true);
+}
+
 void
 ExecConstraints(ResultRelInfo *resultRelInfo,
                TupleTableSlot *slot, EState *estate)
@@ -1702,9 +1752,9 @@ ExecConstraints(ResultRelInfo *resultRelInfo,
    Bitmapset  *insertedCols;
    Bitmapset  *updatedCols;
 
-   Assert(constr);
+   Assert(constr || resultRelInfo->ri_PartitionCheck);
 
-   if (constr->has_not_null)
+   if (constr && constr->has_not_null)
    {
        int         natts = tupdesc->natts;
        int         attrChk;
@@ -1735,7 +1785,7 @@ ExecConstraints(ResultRelInfo *resultRelInfo,
        }
    }
 
-   if (constr->num_check > 0)
+   if (constr && constr->num_check > 0)
    {
        const char *failed;
 
@@ -1759,6 +1809,26 @@ ExecConstraints(ResultRelInfo *resultRelInfo,
                     errtableconstraint(rel, failed)));
        }
    }
+
+   if (resultRelInfo->ri_PartitionCheck &&
+       !ExecPartitionCheck(resultRelInfo, slot, estate))
+   {
+       char       *val_desc;
+
+       insertedCols = GetInsertedColumns(resultRelInfo, estate);
+       updatedCols = GetUpdatedColumns(resultRelInfo, estate);
+       modifiedCols = bms_union(insertedCols, updatedCols);
+       val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel),
+                                                slot,
+                                                tupdesc,
+                                                modifiedCols,
+                                                64);
+       ereport(ERROR,
+               (errcode(ERRCODE_CHECK_VIOLATION),
+                errmsg("new row for relation \"%s\" violates partition constraint",
+                       RelationGetRelationName(rel)),
+         val_desc ? errdetail("Failing row contains %s.", val_desc) : 0));
+   }
 }
 
 /*
@@ -2926,3 +2996,52 @@ EvalPlanQualEnd(EPQState *epqstate)
    epqstate->planstate = NULL;
    epqstate->origslot = NULL;
 }
+
+/*
+ * ExecFindPartition -- Find a leaf partition in the partition tree rooted
+ * at parent, for the heap tuple contained in *slot
+ *
+ * estate must be non-NULL; we'll need it to compute any expressions in the
+ * partition key(s)
+ *
+ * If no leaf partition is found, this routine errors out with the appropriate
+ * error message, else it returns the leaf partition sequence number returned
+ * by get_partition_for_tuple() unchanged.
+ */
+int
+ExecFindPartition(ResultRelInfo *resultRelInfo, PartitionDispatch *pd,
+                 TupleTableSlot *slot, EState *estate)
+{
+   int     result;
+   Oid     failed_at;
+   ExprContext *econtext = GetPerTupleExprContext(estate);
+
+   econtext->ecxt_scantuple = slot;
+   result = get_partition_for_tuple(pd, slot, estate, &failed_at);
+   if (result < 0)
+   {
+       Relation    rel = resultRelInfo->ri_RelationDesc;
+       char       *val_desc;
+       Bitmapset  *insertedCols,
+                  *updatedCols,
+                  *modifiedCols;
+       TupleDesc   tupDesc = RelationGetDescr(rel);
+
+       insertedCols = GetInsertedColumns(resultRelInfo, estate);
+       updatedCols = GetUpdatedColumns(resultRelInfo, estate);
+       modifiedCols = bms_union(insertedCols, updatedCols);
+       val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel),
+                                                slot,
+                                                tupDesc,
+                                                modifiedCols,
+                                                64);
+       Assert(OidIsValid(failed_at));
+       ereport(ERROR,
+               (errcode(ERRCODE_CHECK_VIOLATION),
+                errmsg("no partition of relation \"%s\" found for row",
+                       get_rel_name(failed_at)),
+         val_desc ? errdetail("Failing row contains %s.", val_desc) : 0));
+   }
+
+   return result;
+}
index efb0c5e8e5d2f03bf48c4ea4385610763332d120..c0b58d1841c296f30e7e443859584f8af72f9d92 100644 (file)
@@ -258,6 +258,7 @@ ExecInsert(ModifyTableState *mtstate,
 {
    HeapTuple   tuple;
    ResultRelInfo *resultRelInfo;
+   ResultRelInfo *saved_resultRelInfo = NULL;
    Relation    resultRelationDesc;
    Oid         newId;
    List       *recheckIndexes = NIL;
@@ -272,6 +273,56 @@ ExecInsert(ModifyTableState *mtstate,
     * get information on the (current) result relation
     */
    resultRelInfo = estate->es_result_relation_info;
+
+   /* Determine the partition to heap_insert the tuple into */
+   if (mtstate->mt_partition_dispatch_info)
+   {
+       int     leaf_part_index;
+       TupleConversionMap *map;
+
+       /*
+        * Away we go ... If we end up not finding a partition after all,
+        * ExecFindPartition() does not return and errors out instead.
+        * Otherwise, the returned value is to be used as an index into
+        * arrays mt_partitions[] and mt_partition_tupconv_maps[] that
+        * will get us the ResultRelInfo and TupleConversionMap for the
+        * partition, respectively.
+        */
+       leaf_part_index = ExecFindPartition(resultRelInfo,
+                                       mtstate->mt_partition_dispatch_info,
+                                           slot,
+                                           estate);
+       Assert(leaf_part_index >= 0 &&
+              leaf_part_index < mtstate->mt_num_partitions);
+
+       /*
+        * Save the old ResultRelInfo and switch to the one corresponding to
+        * the selected partition.
+        */
+       saved_resultRelInfo = resultRelInfo;
+       resultRelInfo = mtstate->mt_partitions + leaf_part_index;
+
+       /* We do not yet have a way to insert into a foreign partition */
+       if (resultRelInfo->ri_FdwRoutine)
+           ereport(ERROR,
+                   (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                    errmsg("cannot route inserted tuples to a foreign table")));
+
+       /* For ExecInsertIndexTuples() to work on the partition's indexes */
+       estate->es_result_relation_info = resultRelInfo;
+
+       /*
+        * We might need to convert from the parent rowtype to the partition
+        * rowtype.
+        */
+       map = mtstate->mt_partition_tupconv_maps[leaf_part_index];
+       if (map)
+       {
+           tuple = do_convert_tuple(tuple, map);
+           ExecStoreTuple(tuple, slot, InvalidBuffer, true);
+       }
+   }
+
    resultRelationDesc = resultRelInfo->ri_RelationDesc;
 
    /*
@@ -369,7 +420,7 @@ ExecInsert(ModifyTableState *mtstate,
        /*
         * Check the constraints of the tuple
         */
-       if (resultRelationDesc->rd_att->constr)
+       if (resultRelationDesc->rd_att->constr || resultRelInfo->ri_PartitionCheck)
            ExecConstraints(resultRelInfo, slot, estate);
 
        if (onconflict != ONCONFLICT_NONE && resultRelInfo->ri_NumIndices > 0)
@@ -511,6 +562,12 @@ ExecInsert(ModifyTableState *mtstate,
 
    list_free(recheckIndexes);
 
+   if (saved_resultRelInfo)
+   {
+       resultRelInfo = saved_resultRelInfo;
+       estate->es_result_relation_info = resultRelInfo;
+   }
+
    /*
     * Check any WITH CHECK OPTION constraints from parent views.  We are
     * required to do this after testing all constraints and uniqueness
@@ -922,7 +979,7 @@ lreplace:;
        /*
         * Check the constraints of the tuple
         */
-       if (resultRelationDesc->rd_att->constr)
+       if (resultRelationDesc->rd_att->constr || resultRelInfo->ri_PartitionCheck)
            ExecConstraints(resultRelInfo, slot, estate);
 
        /*
@@ -1565,6 +1622,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
    Plan       *subplan;
    ListCell   *l;
    int         i;
+   Relation    rel;
 
    /* check for unsupported flags */
    Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
@@ -1655,6 +1713,75 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
 
    estate->es_result_relation_info = saved_resultRelInfo;
 
+   /* Build state for INSERT tuple routing */
+   rel = mtstate->resultRelInfo->ri_RelationDesc;
+   if (operation == CMD_INSERT &&
+       rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+   {
+       PartitionDispatch  *pd;
+       int                 i,
+                           j,
+                           num_parted,
+                           num_leaf_parts;
+       List               *leaf_parts;
+       ListCell           *cell;
+       ResultRelInfo      *leaf_part_rri;
+
+       /* Form the partition node tree and lock partitions */
+       pd = RelationGetPartitionDispatchInfo(rel, RowExclusiveLock,
+                                             &num_parted, &leaf_parts);
+       mtstate->mt_partition_dispatch_info = pd;
+       mtstate->mt_num_dispatch = num_parted;
+       num_leaf_parts = list_length(leaf_parts);
+       mtstate->mt_num_partitions = num_leaf_parts;
+       mtstate->mt_partitions = (ResultRelInfo *)
+                       palloc0(num_leaf_parts * sizeof(ResultRelInfo));
+       mtstate->mt_partition_tupconv_maps = (TupleConversionMap **)
+                   palloc0(num_leaf_parts * sizeof(TupleConversionMap *));
+
+       leaf_part_rri = mtstate->mt_partitions;
+       i = j = 0;
+       foreach(cell, leaf_parts)
+       {
+           Oid         partrelid = lfirst_oid(cell);
+           Relation    partrel;
+
+           /*
+            * We locked all the partitions above including the leaf
+            * partitions.  Note that each of the relations in
+            * mtstate->mt_partitions will be closed by ExecEndModifyTable().
+            */
+           partrel = heap_open(partrelid, NoLock);
+
+           /*
+            * Verify result relation is a valid target for the current
+            * operation
+            */
+           CheckValidResultRel(partrel, CMD_INSERT);
+
+           InitResultRelInfo(leaf_part_rri,
+                             partrel,
+                             1,        /* dummy */
+                             false,    /* no partition constraint checks */
+                             eflags);
+
+           /* Open partition indices (note: ON CONFLICT unsupported)*/
+           if (partrel->rd_rel->relhasindex && operation != CMD_DELETE &&
+               leaf_part_rri->ri_IndexRelationDescs == NULL)
+               ExecOpenIndices(leaf_part_rri, false);
+
+           if (!equalTupleDescs(RelationGetDescr(rel),
+                                RelationGetDescr(partrel)))
+               mtstate->mt_partition_tupconv_maps[i] =
+                           convert_tuples_by_name(RelationGetDescr(rel),
+                                                  RelationGetDescr(partrel),
+                                 gettext_noop("could not convert row type"));
+
+           leaf_part_rri++;
+           i++;
+       }
+   }
+
    /*
     * Initialize any WITH CHECK OPTION constraints if needed.
     */
@@ -1886,7 +2013,8 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
 
                    relkind = resultRelInfo->ri_RelationDesc->rd_rel->relkind;
                    if (relkind == RELKIND_RELATION ||
-                       relkind == RELKIND_MATVIEW)
+                       relkind == RELKIND_MATVIEW ||
+                       relkind == RELKIND_PARTITIONED_TABLE)
                    {
                        j->jf_junkAttNo = ExecFindJunkAttribute(j, "ctid");
                        if (!AttributeNumberIsValid(j->jf_junkAttNo))
@@ -1971,6 +2099,26 @@ ExecEndModifyTable(ModifyTableState *node)
                                                           resultRelInfo);
    }
 
+   /* Close all the partitioned tables, leaf partitions, and their indices
+    *
+    * Remember node->mt_partition_dispatch_info[0] corresponds to the root
+    * partitioned table, which we must not try to close, because it is the
+    * main target table of the query that will be closed by ExecEndPlan().
+    */
+   for (i = 1; i < node->mt_num_dispatch; i++)
+   {
+       PartitionDispatch pd = node->mt_partition_dispatch_info[i];
+
+       heap_close(pd->reldesc, NoLock);
+   }
+   for (i = 0; i < node->mt_num_partitions; i++)
+   {
+       ResultRelInfo *resultRelInfo = node->mt_partitions + i;
+
+       ExecCloseIndices(resultRelInfo);
+       heap_close(resultRelInfo->ri_RelationDesc, NoLock);
+   }
+
    /*
     * Free the exprcontext
     */
index dd66adb0b24b9ce6b7bed2f7b25c5058b6d53568..e30c57e86be341f32c02dab1fbeddfef2fc990f7 100644 (file)
@@ -3030,6 +3030,8 @@ CopyCreateStmtFields(const CreateStmt *from, CreateStmt *newnode)
    COPY_NODE_FIELD(relation);
    COPY_NODE_FIELD(tableElts);
    COPY_NODE_FIELD(inhRelations);
+   COPY_NODE_FIELD(partspec);
+   COPY_NODE_FIELD(partbound);
    COPY_NODE_FIELD(ofTypename);
    COPY_NODE_FIELD(constraints);
    COPY_NODE_FIELD(options);
@@ -4188,6 +4190,70 @@ _copyAlterPolicyStmt(const AlterPolicyStmt *from)
    return newnode;
 }
 
+static PartitionSpec *
+_copyPartitionSpec(const PartitionSpec *from)
+{
+
+   PartitionSpec *newnode = makeNode(PartitionSpec);
+
+   COPY_STRING_FIELD(strategy);
+   COPY_NODE_FIELD(partParams);
+   COPY_LOCATION_FIELD(location);
+
+   return newnode;
+}
+
+static PartitionElem *
+_copyPartitionElem(const PartitionElem *from)
+{
+   PartitionElem *newnode = makeNode(PartitionElem);
+
+   COPY_STRING_FIELD(name);
+   COPY_NODE_FIELD(expr);
+   COPY_NODE_FIELD(collation);
+   COPY_NODE_FIELD(opclass);
+   COPY_LOCATION_FIELD(location);
+
+   return newnode;
+}
+
+static PartitionBoundSpec *
+_copyPartitionBoundSpec(const PartitionBoundSpec *from)
+{
+   PartitionBoundSpec *newnode = makeNode(PartitionBoundSpec);
+
+   COPY_SCALAR_FIELD(strategy);
+   COPY_NODE_FIELD(listdatums);
+   COPY_NODE_FIELD(lowerdatums);
+   COPY_NODE_FIELD(upperdatums);
+   COPY_LOCATION_FIELD(location);
+
+   return newnode;
+}
+
+static PartitionRangeDatum *
+_copyPartitionRangeDatum(const PartitionRangeDatum *from)
+{
+   PartitionRangeDatum *newnode = makeNode(PartitionRangeDatum);
+
+   COPY_SCALAR_FIELD(infinite);
+   COPY_NODE_FIELD(value);
+   COPY_LOCATION_FIELD(location);
+
+   return newnode;
+}
+
+static PartitionCmd *
+_copyPartitionCmd(const PartitionCmd *from)
+{
+   PartitionCmd *newnode = makeNode(PartitionCmd);
+
+   COPY_NODE_FIELD(name);
+   COPY_NODE_FIELD(bound);
+
+   return newnode;
+}
+
 /* ****************************************************************
  *                 pg_list.h copy functions
  * ****************************************************************
@@ -5105,6 +5171,21 @@ copyObject(const void *from)
        case T_TriggerTransition:
            retval = _copyTriggerTransition(from);
            break;
+       case T_PartitionSpec:
+           retval = _copyPartitionSpec(from);
+           break;
+       case T_PartitionElem:
+           retval = _copyPartitionElem(from);
+           break;
+       case T_PartitionBoundSpec:
+           retval = _copyPartitionBoundSpec(from);
+           break;
+       case T_PartitionRangeDatum:
+           retval = _copyPartitionRangeDatum(from);
+           break;
+       case T_PartitionCmd:
+           retval = _copyPartitionCmd(from);
+           break;
 
            /*
             * MISCELLANEOUS NODES
index cad3aebecd53940c5529da45eff4cec656714071..b7a109cfb046ac11f9fc92f28a305d70b55cd65e 100644 (file)
@@ -1168,6 +1168,8 @@ _equalCreateStmt(const CreateStmt *a, const CreateStmt *b)
    COMPARE_NODE_FIELD(relation);
    COMPARE_NODE_FIELD(tableElts);
    COMPARE_NODE_FIELD(inhRelations);
+   COMPARE_NODE_FIELD(partspec);
+   COMPARE_NODE_FIELD(partbound);
    COMPARE_NODE_FIELD(ofTypename);
    COMPARE_NODE_FIELD(constraints);
    COMPARE_NODE_FIELD(options);
@@ -2646,6 +2648,59 @@ _equalTriggerTransition(const TriggerTransition *a, const TriggerTransition *b)
    return true;
 }
 
+static bool
+_equalPartitionSpec(const PartitionSpec *a, const PartitionSpec *b)
+{
+   COMPARE_STRING_FIELD(strategy);
+   COMPARE_NODE_FIELD(partParams);
+   COMPARE_LOCATION_FIELD(location);
+
+   return true;
+}
+
+static bool
+_equalPartitionElem(const PartitionElem *a, const PartitionElem *b)
+{
+   COMPARE_STRING_FIELD(name);
+   COMPARE_NODE_FIELD(expr);
+   COMPARE_NODE_FIELD(collation);
+   COMPARE_NODE_FIELD(opclass);
+   COMPARE_LOCATION_FIELD(location);
+
+   return true;
+}
+
+static bool
+_equalPartitionBoundSpec(const PartitionBoundSpec *a, const PartitionBoundSpec *b)
+{
+   COMPARE_SCALAR_FIELD(strategy);
+   COMPARE_NODE_FIELD(listdatums);
+   COMPARE_NODE_FIELD(lowerdatums);
+   COMPARE_NODE_FIELD(upperdatums);
+   COMPARE_LOCATION_FIELD(location);
+
+   return true;
+}
+
+static bool
+_equalPartitionRangeDatum(const PartitionRangeDatum *a, const PartitionRangeDatum *b)
+{
+   COMPARE_SCALAR_FIELD(infinite);
+   COMPARE_NODE_FIELD(value);
+   COMPARE_LOCATION_FIELD(location);
+
+   return true;
+}
+
+static bool
+_equalPartitionCmd(const PartitionCmd *a, const PartitionCmd *b)
+{
+   COMPARE_NODE_FIELD(name);
+   COMPARE_NODE_FIELD(bound);
+
+   return true;
+}
+
 /*
  * Stuff from pg_list.h
  */
@@ -3402,6 +3457,21 @@ equal(const void *a, const void *b)
        case T_TriggerTransition:
            retval = _equalTriggerTransition(a, b);
            break;
+       case T_PartitionSpec:
+           retval = _equalPartitionSpec(a, b);
+           break;
+       case T_PartitionElem:
+           retval = _equalPartitionElem(a, b);
+           break;
+       case T_PartitionBoundSpec:
+           retval = _equalPartitionBoundSpec(a, b);
+           break;
+       case T_PartitionRangeDatum:
+           retval = _equalPartitionRangeDatum(a, b);
+           break;
+       case T_PartitionCmd:
+           retval = _equalPartitionCmd(a, b);
+           break;
 
        default:
            elog(ERROR, "unrecognized node type: %d",
index 399744193c0b90998bbbdaf92e5e28c600cb0e3c..973fb152c19ea996d865b227cab93b06333295dc 100644 (file)
@@ -1552,6 +1552,12 @@ exprLocation(const Node *expr)
            /* just use nested expr's location */
            loc = exprLocation((Node *) ((const InferenceElem *) expr)->expr);
            break;
+       case T_PartitionBoundSpec:
+           loc = ((const PartitionBoundSpec *) expr)->location;
+           break;
+       case T_PartitionRangeDatum:
+           loc = ((const PartitionRangeDatum *) expr)->location;
+           break;
        default:
            /* for any other node type it's just unknown... */
            loc = -1;
index 748b68792927b1c8acc63dd69208ca45bf441c19..0d858f592067a2ea471149ed21e05b1b262d5075 100644 (file)
@@ -2392,6 +2392,8 @@ _outCreateStmtInfo(StringInfo str, const CreateStmt *node)
    WRITE_NODE_FIELD(relation);
    WRITE_NODE_FIELD(tableElts);
    WRITE_NODE_FIELD(inhRelations);
+   WRITE_NODE_FIELD(partspec);
+   WRITE_NODE_FIELD(partbound);
    WRITE_NODE_FIELD(ofTypename);
    WRITE_NODE_FIELD(constraints);
    WRITE_NODE_FIELD(options);
@@ -3277,6 +3279,47 @@ _outForeignKeyCacheInfo(StringInfo str, const ForeignKeyCacheInfo *node)
        appendStringInfo(str, " %u", node->conpfeqop[i]);
 }
 
+static void
+_outPartitionSpec(StringInfo str, const PartitionSpec *node)
+{
+   WRITE_NODE_TYPE("PARTITIONBY");
+
+   WRITE_STRING_FIELD(strategy);
+   WRITE_NODE_FIELD(partParams);
+   WRITE_LOCATION_FIELD(location);
+}
+
+static void
+_outPartitionElem(StringInfo str, const PartitionElem *node)
+{
+   WRITE_NODE_TYPE("PARTITIONELEM");
+
+   WRITE_STRING_FIELD(name);
+   WRITE_NODE_FIELD(expr);
+   WRITE_NODE_FIELD(collation);
+   WRITE_NODE_FIELD(opclass);
+   WRITE_LOCATION_FIELD(location);
+}
+
+static void
+_outPartitionBoundSpec(StringInfo str, const PartitionBoundSpec *node)
+{
+   WRITE_NODE_TYPE("PARTITIONBOUND");
+
+   WRITE_CHAR_FIELD(strategy);
+   WRITE_NODE_FIELD(listdatums);
+   WRITE_NODE_FIELD(lowerdatums);
+   WRITE_NODE_FIELD(upperdatums);
+}
+
+static void
+_outPartitionRangeDatum(StringInfo str, const PartitionRangeDatum *node)
+{
+   WRITE_NODE_TYPE("PARTRANGEDATUM");
+
+   WRITE_BOOL_FIELD(infinite);
+   WRITE_NODE_FIELD(value);
+}
 
 /*
  * outNode -
@@ -3865,6 +3908,18 @@ outNode(StringInfo str, const void *obj)
            case T_TriggerTransition:
                _outTriggerTransition(str, obj);
                break;
+           case T_PartitionSpec:
+               _outPartitionSpec(str, obj);
+               break;
+           case T_PartitionElem:
+               _outPartitionElem(str, obj);
+               break;
+           case T_PartitionBoundSpec:
+               _outPartitionBoundSpec(str, obj);
+               break;
+           case T_PartitionRangeDatum:
+               _outPartitionRangeDatum(str, obj);
+               break;
 
            default:
 
index 917e6c8a65efe96aa84e51780f702376bb70473c..c587d4e1d7257ab1fc5982f80c6dfbbe15413bff 100644 (file)
@@ -2265,6 +2265,36 @@ _readExtensibleNode(void)
    READ_DONE();
 }
 
+/*
+ * _readPartitionBoundSpec
+ */
+static PartitionBoundSpec *
+_readPartitionBoundSpec(void)
+{
+   READ_LOCALS(PartitionBoundSpec);
+
+   READ_CHAR_FIELD(strategy);
+   READ_NODE_FIELD(listdatums);
+   READ_NODE_FIELD(lowerdatums);
+   READ_NODE_FIELD(upperdatums);
+
+   READ_DONE();
+}
+
+/*
+ * _readPartitionRangeDatum
+ */
+static PartitionRangeDatum *
+_readPartitionRangeDatum(void)
+{
+   READ_LOCALS(PartitionRangeDatum);
+
+   READ_BOOL_FIELD(infinite);
+   READ_NODE_FIELD(value);
+
+   READ_DONE();
+}
+
 /*
  * parseNodeString
  *
@@ -2497,6 +2527,10 @@ parseNodeString(void)
        return_value = _readAlternativeSubPlan();
    else if (MATCH("EXTENSIBLENODE", 14))
        return_value = _readExtensibleNode();
+   else if (MATCH("PARTITIONBOUND", 14))
+       return_value = _readPartitionBoundSpec();
+   else if (MATCH("PARTRANGEDATUM", 14))
+       return_value = _readPartitionRangeDatum();
    else
    {
        elog(ERROR, "badly formatted node string \"%.32s\"...", token);
index bb16c59028d304f1111e84e812a6c7776e9b582d..72272d9bb7bce7804397daf02cf917da4a876740 100644 (file)
@@ -27,6 +27,7 @@
 #include "catalog/catalog.h"
 #include "catalog/dependency.h"
 #include "catalog/heap.h"
+#include "catalog/partition.h"
 #include "catalog/pg_am.h"
 #include "foreign/fdwapi.h"
 #include "miscadmin.h"
@@ -1140,6 +1141,7 @@ get_relation_constraints(PlannerInfo *root,
    Index       varno = rel->relid;
    Relation    relation;
    TupleConstr *constr;
+   List        *pcqual;
 
    /*
     * We assume the relation has already been safely locked.
@@ -1225,6 +1227,24 @@ get_relation_constraints(PlannerInfo *root,
        }
    }
 
+   /* Append partition predicates, if any */
+   pcqual = RelationGetPartitionQual(relation, true);
+   if (pcqual)
+   {
+       /*
+        * Run each expression through const-simplification and
+        * canonicalization similar to check constraints.
+        */
+       pcqual = (List *) eval_const_expressions(root, (Node *) pcqual);
+       pcqual = (List *) canonicalize_qual((Expr *) pcqual);
+
+       /* Fix Vars to have the desired varno */
+       if (varno != 1)
+           ChangeVarNodes((Node *) pcqual, 1, varno, 0);
+
+       result = list_concat(result, pcqual);
+   }
+
    heap_close(relation, NoLock);
 
    return result;
index 1a541788eb1291832c32cc2b977280c309071d60..73643461672d9b431e5cdbbba68ad2efd979bfa5 100644 (file)
@@ -806,8 +806,16 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt)
 
    /* Process ON CONFLICT, if any. */
    if (stmt->onConflictClause)
+   {
+       /* Bail out if target relation is partitioned table */
+       if (pstate->p_target_rangetblentry->relkind == RELKIND_PARTITIONED_TABLE)
+           ereport(ERROR,
+                   (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                    errmsg("ON CONFLICT clause is not supported with partitioned tables")));
+
        qry->onConflict = transformOnConflictClause(pstate,
                                                    stmt->onConflictClause);
+   }
 
    /*
     * If we have a RETURNING clause, we need to add the target relation to
index 414348b95b4b1be78820e3ae48c3bc591482bb3e..2ed7b5259d0597ffab59319b2390cc8e9135f6ac 100644 (file)
@@ -229,6 +229,9 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
    struct ImportQual   *importqual;
    InsertStmt          *istmt;
    VariableSetStmt     *vsetstmt;
+   PartitionElem       *partelem;
+   PartitionSpec       *partspec;
+   PartitionRangeDatum *partrange_datum;
 }
 
 %type <node>   stmt schema_stmt
@@ -276,7 +279,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type <ival>   add_drop opt_asc_desc opt_nulls_order
 
 %type <node>   alter_table_cmd alter_type_cmd opt_collate_clause
-      replica_identity
+      replica_identity partition_cmd
 %type <list>   alter_table_cmds alter_type_cmds
 
 %type <dbehavior>  opt_drop_behavior
@@ -545,6 +548,17 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
                opt_frame_clause frame_extent frame_bound
 %type <str>        opt_existing_window_name
 %type <boolean> opt_if_not_exists
+%type <partspec>   PartitionSpec OptPartitionSpec
+%type <str>            part_strategy
+%type <partelem>   part_elem
+%type <list>       part_params
+%type <list>       OptPartitionElementList PartitionElementList
+%type <node>       PartitionElement
+%type <node>       ForValues
+%type <node>       partbound_datum
+%type <list>       partbound_datum_list
+%type <partrange_datum>    PartitionRangeDatum
+%type <list>       range_datum_list
 
 /*
  * Non-keyword token types.  These are hard-wired into the "flex" lexer.
@@ -570,7 +584,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 /* ordinary key words in alphabetical order */
 %token <keyword> ABORT_P ABSOLUTE_P ACCESS ACTION ADD_P ADMIN AFTER
    AGGREGATE ALL ALSO ALTER ALWAYS ANALYSE ANALYZE AND ANY ARRAY AS ASC
-   ASSERTION ASSIGNMENT ASYMMETRIC AT ATTRIBUTE AUTHORIZATION
+   ASSERTION ASSIGNMENT ASYMMETRIC AT ATTACH ATTRIBUTE AUTHORIZATION
 
    BACKWARD BEFORE BEGIN_P BETWEEN BIGINT BINARY BIT
    BOOLEAN_P BOTH BY
@@ -586,7 +600,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 
    DATA_P DATABASE DAY_P DEALLOCATE DEC DECIMAL_P DECLARE DEFAULT DEFAULTS
    DEFERRABLE DEFERRED DEFINER DELETE_P DELIMITER DELIMITERS DEPENDS DESC
-   DICTIONARY DISABLE_P DISCARD DISTINCT DO DOCUMENT_P DOMAIN_P DOUBLE_P DROP
+   DETACH DICTIONARY DISABLE_P DISCARD DISTINCT DO DOCUMENT_P DOMAIN_P
+   DOUBLE_P DROP
 
    EACH ELSE ENABLE_P ENCODING ENCRYPTED END_P ENUM_P ESCAPE EVENT EXCEPT
    EXCLUDE EXCLUDING EXCLUSIVE EXECUTE EXISTS EXPLAIN
@@ -1787,6 +1802,24 @@ AlterTableStmt:
                    n->missing_ok = true;
                    $$ = (Node *)n;
                }
+       |   ALTER TABLE relation_expr partition_cmd
+               {
+                   AlterTableStmt *n = makeNode(AlterTableStmt);
+                   n->relation = $3;
+                   n->cmds = list_make1($4);
+                   n->relkind = OBJECT_TABLE;
+                   n->missing_ok = false;
+                   $$ = (Node *)n;
+               }
+       |   ALTER TABLE IF_P EXISTS relation_expr partition_cmd
+               {
+                   AlterTableStmt *n = makeNode(AlterTableStmt);
+                   n->relation = $5;
+                   n->cmds = list_make1($6);
+                   n->relkind = OBJECT_TABLE;
+                   n->missing_ok = true;
+                   $$ = (Node *)n;
+               }
        |   ALTER TABLE ALL IN_P TABLESPACE name SET TABLESPACE name opt_nowait
                {
                    AlterTableMoveAllStmt *n =
@@ -1932,6 +1965,34 @@ alter_table_cmds:
            | alter_table_cmds ',' alter_table_cmd  { $$ = lappend($1, $3); }
        ;
 
+partition_cmd:
+           /* ALTER TABLE <name> ATTACH PARTITION <table_name> FOR VALUES */
+           ATTACH PARTITION qualified_name ForValues
+               {
+                   AlterTableCmd *n = makeNode(AlterTableCmd);
+                   PartitionCmd *cmd = makeNode(PartitionCmd);
+
+                   n->subtype = AT_AttachPartition;
+                   cmd->name = $3;
+                   cmd->bound = (Node *) $4;
+                   n->def = (Node *) cmd;
+
+                   $$ = (Node *) n;
+               }
+           /* ALTER TABLE <name> DETACH PARTITION <partition_name> */
+           | DETACH PARTITION qualified_name
+               {
+                   AlterTableCmd *n = makeNode(AlterTableCmd);
+                   PartitionCmd *cmd = makeNode(PartitionCmd);
+
+                   n->subtype = AT_DetachPartition;
+                   cmd->name = $3;
+                   n->def = (Node *) cmd;
+
+                   $$ = (Node *) n;
+               }
+       ;
+
 alter_table_cmd:
            /* ALTER TABLE <name> ADD <coldef> */
            ADD_P columnDef
@@ -2467,6 +2528,73 @@ reloption_elem:
                }
        ;
 
+ForValues:
+           /* a LIST partition */
+           FOR VALUES IN_P '(' partbound_datum_list ')'
+               {
+                   PartitionBoundSpec *n = makeNode(PartitionBoundSpec);
+
+                   n->strategy = PARTITION_STRATEGY_LIST;
+                   n->listdatums = $5;
+                   n->location = @3;
+
+                   $$ = (Node *) n;
+               }
+
+           /* a RANGE partition */
+           | FOR VALUES FROM '(' range_datum_list ')' TO '(' range_datum_list ')'
+               {
+                   PartitionBoundSpec *n = makeNode(PartitionBoundSpec);
+
+                   n->strategy = PARTITION_STRATEGY_RANGE;
+                   n->lowerdatums = $5;
+                   n->upperdatums = $9;
+                   n->location = @3;
+
+                   $$ = (Node *) n;
+               }
+       ;
+
+partbound_datum:
+           Sconst          { $$ = makeStringConst($1, @1); }
+           | NumericOnly   { $$ = makeAConst($1, @1); }
+           | NULL_P        { $$ = makeNullAConst(@1); }
+       ;
+
+partbound_datum_list:
+           partbound_datum                     { $$ = list_make1($1); }
+           | partbound_datum_list ',' partbound_datum
+                                               { $$ = lappend($1, $3); }
+       ;
+
+range_datum_list:
+           PartitionRangeDatum                 { $$ = list_make1($1); }
+           | range_datum_list ',' PartitionRangeDatum
+                                               { $$ = lappend($1, $3); }
+       ;
+
+PartitionRangeDatum:
+           UNBOUNDED
+               {
+                   PartitionRangeDatum *n = makeNode(PartitionRangeDatum);
+
+                   n->infinite = true;
+                   n->value = NULL;
+                   n->location = @1;
+
+                   $$ = n;
+               }
+           | partbound_datum
+               {
+                   PartitionRangeDatum *n = makeNode(PartitionRangeDatum);
+
+                   n->infinite = false;
+                   n->value = $1;
+                   n->location = @1;
+
+                   $$ = n;
+               }
+       ;
 
 /*****************************************************************************
  *
@@ -2812,69 +2940,113 @@ copy_generic_opt_arg_list_item:
  *****************************************************************************/
 
 CreateStmt:    CREATE OptTemp TABLE qualified_name '(' OptTableElementList ')'
-           OptInherit OptWith OnCommitOption OptTableSpace
+           OptInherit OptPartitionSpec OptWith OnCommitOption OptTableSpace
                {
                    CreateStmt *n = makeNode(CreateStmt);
                    $4->relpersistence = $2;
                    n->relation = $4;
                    n->tableElts = $6;
                    n->inhRelations = $8;
+                   n->partspec = $9;
                    n->ofTypename = NULL;
                    n->constraints = NIL;
-                   n->options = $9;
-                   n->oncommit = $10;
-                   n->tablespacename = $11;
+                   n->options = $10;
+                   n->oncommit = $11;
+                   n->tablespacename = $12;
                    n->if_not_exists = false;
                    $$ = (Node *)n;
                }
        | CREATE OptTemp TABLE IF_P NOT EXISTS qualified_name '('
-           OptTableElementList ')' OptInherit OptWith OnCommitOption
-           OptTableSpace
+           OptTableElementList ')' OptInherit OptPartitionSpec OptWith
+           OnCommitOption OptTableSpace
                {
                    CreateStmt *n = makeNode(CreateStmt);
                    $7->relpersistence = $2;
                    n->relation = $7;
                    n->tableElts = $9;
                    n->inhRelations = $11;
+                   n->partspec = $12;
                    n->ofTypename = NULL;
                    n->constraints = NIL;
-                   n->options = $12;
-                   n->oncommit = $13;
-                   n->tablespacename = $14;
+                   n->options = $13;
+                   n->oncommit = $14;
+                   n->tablespacename = $15;
                    n->if_not_exists = true;
                    $$ = (Node *)n;
                }
        | CREATE OptTemp TABLE qualified_name OF any_name
-           OptTypedTableElementList OptWith OnCommitOption OptTableSpace
+           OptTypedTableElementList OptPartitionSpec OptWith OnCommitOption
+           OptTableSpace
                {
                    CreateStmt *n = makeNode(CreateStmt);
                    $4->relpersistence = $2;
                    n->relation = $4;
                    n->tableElts = $7;
                    n->inhRelations = NIL;
+                   n->partspec = $8;
                    n->ofTypename = makeTypeNameFromNameList($6);
                    n->ofTypename->location = @6;
                    n->constraints = NIL;
-                   n->options = $8;
-                   n->oncommit = $9;
-                   n->tablespacename = $10;
+                   n->options = $9;
+                   n->oncommit = $10;
+                   n->tablespacename = $11;
                    n->if_not_exists = false;
                    $$ = (Node *)n;
                }
        | CREATE OptTemp TABLE IF_P NOT EXISTS qualified_name OF any_name
-           OptTypedTableElementList OptWith OnCommitOption OptTableSpace
+           OptTypedTableElementList OptPartitionSpec OptWith OnCommitOption
+           OptTableSpace
                {
                    CreateStmt *n = makeNode(CreateStmt);
                    $7->relpersistence = $2;
                    n->relation = $7;
                    n->tableElts = $10;
                    n->inhRelations = NIL;
+                   n->partspec = $11;
                    n->ofTypename = makeTypeNameFromNameList($9);
                    n->ofTypename->location = @9;
                    n->constraints = NIL;
+                   n->options = $12;
+                   n->oncommit = $13;
+                   n->tablespacename = $14;
+                   n->if_not_exists = true;
+                   $$ = (Node *)n;
+               }
+       | CREATE OptTemp TABLE qualified_name PARTITION OF qualified_name
+           OptPartitionElementList ForValues OptPartitionSpec OptWith
+           OnCommitOption OptTableSpace
+               {
+                   CreateStmt *n = makeNode(CreateStmt);
+                   $4->relpersistence = $2;
+                   n->relation = $4;
+                   n->tableElts = $8;
+                   n->inhRelations = list_make1($7);
+                   n->partbound = (Node *) $9;
+                   n->partspec = $10;
+                   n->ofTypename = NULL;
+                   n->constraints = NIL;
                    n->options = $11;
                    n->oncommit = $12;
                    n->tablespacename = $13;
+                   n->if_not_exists = false;
+                   $$ = (Node *)n;
+               }
+       | CREATE OptTemp TABLE IF_P NOT EXISTS qualified_name PARTITION OF
+           qualified_name OptPartitionElementList ForValues OptPartitionSpec
+           OptWith OnCommitOption OptTableSpace
+               {
+                   CreateStmt *n = makeNode(CreateStmt);
+                   $7->relpersistence = $2;
+                   n->relation = $7;
+                   n->tableElts = $11;
+                   n->inhRelations = list_make1($10);
+                   n->partbound = (Node *) $12;
+                   n->partspec = $13;
+                   n->ofTypename = NULL;
+                   n->constraints = NIL;
+                   n->options = $14;
+                   n->oncommit = $15;
+                   n->tablespacename = $16;
                    n->if_not_exists = true;
                    $$ = (Node *)n;
                }
@@ -2923,6 +3095,11 @@ OptTypedTableElementList:
            | /*EMPTY*/                         { $$ = NIL; }
        ;
 
+OptPartitionElementList:
+           '(' PartitionElementList ')'        { $$ = $2; }
+           | /*EMPTY*/                         { $$ = NIL; }
+       ;
+
 TableElementList:
            TableElement
                {
@@ -2945,6 +3122,17 @@ TypedTableElementList:
                }
        ;
 
+PartitionElementList:
+           PartitionElement
+               {
+                   $$ = list_make1($1);
+               }
+           | PartitionElementList ',' PartitionElement
+               {
+                   $$ = lappend($1, $3);
+               }
+       ;
+
 TableElement:
            columnDef                           { $$ = $1; }
            | TableLikeClause                   { $$ = $1; }
@@ -2956,6 +3144,28 @@ TypedTableElement:
            | TableConstraint                   { $$ = $1; }
        ;
 
+PartitionElement:
+       TableConstraint                 { $$ = $1; }
+       |   ColId ColQualList
+           {
+               ColumnDef *n = makeNode(ColumnDef);
+               n->colname = $1;
+               n->typeName = NULL;
+               n->inhcount = 0;
+               n->is_local = true;
+               n->is_not_null = false;
+               n->is_from_type = false;
+               n->storage = 0;
+               n->raw_default = NULL;
+               n->cooked_default = NULL;
+               n->collOid = InvalidOid;
+               SplitColQualList($2, &n->constraints, &n->collClause,
+                                yyscanner);
+               n->location = @1;
+               $$ = (Node *) n;
+           }
+       ;
+
 columnDef: ColId Typename create_generic_options ColQualList
                {
                    ColumnDef *n = makeNode(ColumnDef);
@@ -3419,6 +3629,65 @@ OptInherit: INHERITS '(' qualified_name_list ')' { $$ = $3; }
            | /*EMPTY*/                             { $$ = NIL; }
        ;
 
+/* Optional partition key specification */
+OptPartitionSpec: PartitionSpec    { $$ = $1; }
+           | /*EMPTY*/         { $$ = NULL; }
+       ;
+
+PartitionSpec: PARTITION BY part_strategy '(' part_params ')'
+               {
+                   PartitionSpec *n = makeNode(PartitionSpec);
+
+                   n->strategy = $3;
+                   n->partParams = $5;
+                   n->location = @1;
+
+                   $$ = n;
+               }
+       ;
+
+part_strategy: IDENT                   { $$ = $1; }
+               | unreserved_keyword    { $$ = pstrdup($1); }
+       ;
+
+part_params:   part_elem                       { $$ = list_make1($1); }
+           | part_params ',' part_elem         { $$ = lappend($1, $3); }
+       ;
+
+part_elem: ColId opt_collate opt_class
+               {
+                   PartitionElem *n = makeNode(PartitionElem);
+
+                   n->name = $1;
+                   n->expr = NULL;
+                   n->collation = $2;
+                   n->opclass = $3;
+                   n->location = @1;
+                   $$ = n;
+               }
+           | func_expr_windowless opt_collate opt_class
+               {
+                   PartitionElem *n = makeNode(PartitionElem);
+
+                   n->name = NULL;
+                   n->expr = $1;
+                   n->collation = $2;
+                   n->opclass = $3;
+                   n->location = @1;
+                   $$ = n;
+               }
+           | '(' a_expr ')' opt_collate opt_class
+               {
+                   PartitionElem *n = makeNode(PartitionElem);
+
+                   n->name = NULL;
+                   n->expr = $2;
+                   n->collation = $4;
+                   n->opclass = $5;
+                   n->location = @1;
+                   $$ = n;
+               }
+       ;
 /* WITH (options) is preferred, WITH OIDS and WITHOUT OIDS are legacy forms */
 OptWith:
            WITH reloptions             { $$ = $2; }
@@ -4484,6 +4753,48 @@ CreateForeignTableStmt:
                    n->options = $14;
                    $$ = (Node *) n;
                }
+       | CREATE FOREIGN TABLE qualified_name
+           PARTITION OF qualified_name OptPartitionElementList ForValues
+           SERVER name create_generic_options
+               {
+                   CreateForeignTableStmt *n = makeNode(CreateForeignTableStmt);
+                   $4->relpersistence = RELPERSISTENCE_PERMANENT;
+                   n->base.relation = $4;
+                   n->base.inhRelations = list_make1($7);
+                   n->base.tableElts = $8;
+                   n->base.partbound = (Node *) $9;
+                   n->base.ofTypename = NULL;
+                   n->base.constraints = NIL;
+                   n->base.options = NIL;
+                   n->base.oncommit = ONCOMMIT_NOOP;
+                   n->base.tablespacename = NULL;
+                   n->base.if_not_exists = false;
+                   /* FDW-specific data */
+                   n->servername = $11;
+                   n->options = $12;
+                   $$ = (Node *) n;
+               }
+       | CREATE FOREIGN TABLE IF_P NOT EXISTS qualified_name
+           PARTITION OF qualified_name OptPartitionElementList ForValues
+           SERVER name create_generic_options
+               {
+                   CreateForeignTableStmt *n = makeNode(CreateForeignTableStmt);
+                   $7->relpersistence = RELPERSISTENCE_PERMANENT;
+                   n->base.relation = $7;
+                   n->base.inhRelations = list_make1($10);
+                   n->base.tableElts = $11;
+                   n->base.partbound = (Node *) $12;
+                   n->base.ofTypename = NULL;
+                   n->base.constraints = NIL;
+                   n->base.options = NIL;
+                   n->base.oncommit = ONCOMMIT_NOOP;
+                   n->base.tablespacename = NULL;
+                   n->base.if_not_exists = true;
+                   /* FDW-specific data */
+                   n->servername = $14;
+                   n->options = $15;
+                   $$ = (Node *) n;
+               }
        ;
 
 /*****************************************************************************
@@ -13703,6 +14014,7 @@ unreserved_keyword:
            | ASSERTION
            | ASSIGNMENT
            | AT
+           | ATTACH
            | ATTRIBUTE
            | BACKWARD
            | BEFORE
@@ -13749,6 +14061,7 @@ unreserved_keyword:
            | DELIMITER
            | DELIMITERS
            | DEPENDS
+           | DETACH
            | DICTIONARY
            | DISABLE_P
            | DISCARD
index 481a4ddc4847d49d73bef4181a891210248e7721..92d1577030c79f6728f14125bc38696b931fbc8c 100644 (file)
@@ -501,6 +501,13 @@ check_agglevels_and_constraints(ParseState *pstate, Node *expr)
                err = _("grouping operations are not allowed in trigger WHEN conditions");
 
            break;
+       case EXPR_KIND_PARTITION_EXPRESSION:
+           if (isAgg)
+               err = _("aggregate functions are not allowed in partition key expression");
+           else
+               err = _("grouping operations are not allowed in partition key expression");
+
+           break;
 
            /*
             * There is intentionally no default: case here, so that the
@@ -858,6 +865,9 @@ transformWindowFuncCall(ParseState *pstate, WindowFunc *wfunc,
        case EXPR_KIND_TRIGGER_WHEN:
            err = _("window functions are not allowed in trigger WHEN conditions");
            break;
+       case EXPR_KIND_PARTITION_EXPRESSION:
+           err = _("window functions are not allowed in partition key expression");
+           break;
 
            /*
             * There is intentionally no default: case here, so that the
index 17d1cbf8b328d4fdb7f23ab0f54659ac84ebe6de..8a2bdf06e8de64cac1e07ee1806fd2e4ee6dc898 100644 (file)
@@ -1843,6 +1843,9 @@ transformSubLink(ParseState *pstate, SubLink *sublink)
        case EXPR_KIND_TRIGGER_WHEN:
            err = _("cannot use subquery in trigger WHEN condition");
            break;
+       case EXPR_KIND_PARTITION_EXPRESSION:
+           err = _("cannot use subquery in partition key expression");
+           break;
 
            /*
             * There is intentionally no default: case here, so that the
@@ -3446,6 +3449,8 @@ ParseExprKindName(ParseExprKind exprKind)
            return "EXECUTE";
        case EXPR_KIND_TRIGGER_WHEN:
            return "WHEN";
+       case EXPR_KIND_PARTITION_EXPRESSION:
+           return "PARTITION BY";
 
            /*
             * There is intentionally no default: case here, so that the
index 56c9a4293df79a2eab6e053075dd3e6d8ce177a8..7d9b4157d4dd3906d26de06c779bfa3518a98572 100644 (file)
@@ -2166,6 +2166,9 @@ check_srf_call_placement(ParseState *pstate, int location)
        case EXPR_KIND_TRIGGER_WHEN:
            err = _("set-returning functions are not allowed in trigger WHEN conditions");
            break;
+       case EXPR_KIND_PARTITION_EXPRESSION:
+           err = _("set-returning functions are not allowed in partition key expression");
+           break;
 
            /*
             * There is intentionally no default: case here, so that the
index 0670bc24822a44a8bd8d38a59a9dee1f6c430c17..cc6a961bb468b24f7e32eff25fd81f721a6ea72f 100644 (file)
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
+#include "optimizer/planner.h"
 #include "parser/analyze.h"
 #include "parser/parse_clause.h"
+#include "parser/parse_coerce.h"
 #include "parser/parse_collate.h"
 #include "parser/parse_expr.h"
 #include "parser/parse_relation.h"
@@ -62,6 +64,7 @@
 #include "utils/guc.h"
 #include "utils/lsyscache.h"
 #include "utils/rel.h"
+#include "utils/ruleutils.h"
 #include "utils/syscache.h"
 #include "utils/typcache.h"
 
@@ -87,6 +90,8 @@ typedef struct
    List       *alist;          /* "after list" of things to do after creating
                                 * the table */
    IndexStmt  *pkey;           /* PRIMARY KEY index, if any */
+   bool        ispartitioned;  /* true if table is partitioned */
+   Node       *partbound;      /* transformed FOR VALUES */
 } CreateStmtContext;
 
 /* State shared by transformCreateSchemaStmt and its subroutines */
@@ -129,6 +134,7 @@ static void transformConstraintAttrs(CreateStmtContext *cxt,
                         List *constraintList);
 static void transformColumnType(CreateStmtContext *cxt, ColumnDef *column);
 static void setSchemaName(char *context_schema, char **stmt_schema_name);
+static void transformAttachPartition(CreateStmtContext *cxt, PartitionCmd *cmd);
 
 
 /*
@@ -229,6 +235,7 @@ transformCreateStmt(CreateStmt *stmt, const char *queryString)
    cxt.blist = NIL;
    cxt.alist = NIL;
    cxt.pkey = NULL;
+   cxt.ispartitioned = stmt->partspec != NULL;
 
    /*
     * Notice that we allow OIDs here only for plain tables, even though
@@ -247,6 +254,28 @@ transformCreateStmt(CreateStmt *stmt, const char *queryString)
    if (stmt->ofTypename)
        transformOfType(&cxt, stmt->ofTypename);
 
+   if (stmt->partspec)
+   {
+       int         partnatts = list_length(stmt->partspec->partParams);
+
+       if (stmt->inhRelations && !stmt->partbound)
+           ereport(ERROR,
+                   (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+           errmsg("cannot create partitioned table as inheritance child")));
+
+       if (partnatts > PARTITION_MAX_KEYS)
+           ereport(ERROR,
+                   (errcode(ERRCODE_TOO_MANY_COLUMNS),
+                    errmsg("cannot partition using more than %d columns",
+                           PARTITION_MAX_KEYS)));
+
+       if (!pg_strcasecmp(stmt->partspec->strategy, "list") &&
+           partnatts > 1)
+           ereport(ERROR,
+                   (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+               errmsg("cannot list partition using more than one column")));
+   }
+
    /*
     * Run through each primary element in the table creation clause. Separate
     * column defs from constraints, and do preliminary analysis.  We have to
@@ -583,6 +612,12 @@ transformColumnDefinition(CreateStmtContext *cxt, ColumnDef *column)
                             errmsg("primary key constraints are not supported on foreign tables"),
                             parser_errposition(cxt->pstate,
                                                constraint->location)));
+               if (cxt->ispartitioned)
+                   ereport(ERROR,
+                           (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                            errmsg("primary key constraints are not supported on partitioned tables"),
+                            parser_errposition(cxt->pstate,
+                                               constraint->location)));
                /* FALL THRU */
 
            case CONSTR_UNIQUE:
@@ -592,6 +627,12 @@ transformColumnDefinition(CreateStmtContext *cxt, ColumnDef *column)
                             errmsg("unique constraints are not supported on foreign tables"),
                             parser_errposition(cxt->pstate,
                                                constraint->location)));
+               if (cxt->ispartitioned)
+                   ereport(ERROR,
+                           (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                            errmsg("unique constraints are not supported on partitioned tables"),
+                            parser_errposition(cxt->pstate,
+                                               constraint->location)));
                if (constraint->keys == NIL)
                    constraint->keys = list_make1(makeString(column->colname));
                cxt->ixconstraints = lappend(cxt->ixconstraints, constraint);
@@ -609,6 +650,12 @@ transformColumnDefinition(CreateStmtContext *cxt, ColumnDef *column)
                             errmsg("foreign key constraints are not supported on foreign tables"),
                             parser_errposition(cxt->pstate,
                                                constraint->location)));
+               if (cxt->ispartitioned)
+                   ereport(ERROR,
+                           (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                            errmsg("foreign key constraints are not supported on partitioned tables"),
+                            parser_errposition(cxt->pstate,
+                                               constraint->location)));
 
                /*
                 * Fill in the current attribute's name and throw it into the
@@ -674,6 +721,12 @@ transformTableConstraint(CreateStmtContext *cxt, Constraint *constraint)
                         errmsg("primary key constraints are not supported on foreign tables"),
                         parser_errposition(cxt->pstate,
                                            constraint->location)));
+           if (cxt->ispartitioned)
+               ereport(ERROR,
+                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                        errmsg("primary key constraints are not supported on partitioned tables"),
+                        parser_errposition(cxt->pstate,
+                                           constraint->location)));
            cxt->ixconstraints = lappend(cxt->ixconstraints, constraint);
            break;
 
@@ -684,6 +737,12 @@ transformTableConstraint(CreateStmtContext *cxt, Constraint *constraint)
                         errmsg("unique constraints are not supported on foreign tables"),
                         parser_errposition(cxt->pstate,
                                            constraint->location)));
+           if (cxt->ispartitioned)
+               ereport(ERROR,
+                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                        errmsg("unique constraints are not supported on partitioned tables"),
+                        parser_errposition(cxt->pstate,
+                                           constraint->location)));
            cxt->ixconstraints = lappend(cxt->ixconstraints, constraint);
            break;
 
@@ -694,6 +753,12 @@ transformTableConstraint(CreateStmtContext *cxt, Constraint *constraint)
                         errmsg("exclusion constraints are not supported on foreign tables"),
                         parser_errposition(cxt->pstate,
                                            constraint->location)));
+           if (cxt->ispartitioned)
+               ereport(ERROR,
+                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                        errmsg("exclusion constraints are not supported on partitioned tables"),
+                        parser_errposition(cxt->pstate,
+                                           constraint->location)));
            cxt->ixconstraints = lappend(cxt->ixconstraints, constraint);
            break;
 
@@ -708,6 +773,12 @@ transformTableConstraint(CreateStmtContext *cxt, Constraint *constraint)
                         errmsg("foreign key constraints are not supported on foreign tables"),
                         parser_errposition(cxt->pstate,
                                            constraint->location)));
+           if (cxt->ispartitioned)
+               ereport(ERROR,
+                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                        errmsg("foreign key constraints are not supported on partitioned tables"),
+                        parser_errposition(cxt->pstate,
+                                           constraint->location)));
            cxt->fkconstraints = lappend(cxt->fkconstraints, constraint);
            break;
 
@@ -763,7 +834,8 @@ transformTableLikeClause(CreateStmtContext *cxt, TableLikeClause *table_like_cla
        relation->rd_rel->relkind != RELKIND_VIEW &&
        relation->rd_rel->relkind != RELKIND_MATVIEW &&
        relation->rd_rel->relkind != RELKIND_COMPOSITE_TYPE &&
-       relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
+       relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
+       relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
        ereport(ERROR,
                (errcode(ERRCODE_WRONG_OBJECT_TYPE),
                 errmsg("\"%s\" is not a table, view, materialized view, composite type, or foreign table",
@@ -1854,7 +1926,8 @@ transformIndexConstraint(Constraint *constraint, CreateStmtContext *cxt)
                rel = heap_openrv(inh, AccessShareLock);
                /* check user requested inheritance from valid relkind */
                if (rel->rd_rel->relkind != RELKIND_RELATION &&
-                   rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
+                   rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
+                   rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
                    ereport(ERROR,
                            (errcode(ERRCODE_WRONG_OBJECT_TYPE),
                             errmsg("inherited relation \"%s\" is not a table or foreign table",
@@ -2512,6 +2585,8 @@ transformAlterTableStmt(Oid relid, AlterTableStmt *stmt,
    cxt.blist = NIL;
    cxt.alist = NIL;
    cxt.pkey = NULL;
+   cxt.ispartitioned = (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
+   cxt.partbound = NULL;
 
    /*
     * The only subtypes that currently require parse transformation handling
@@ -2594,6 +2669,19 @@ transformAlterTableStmt(Oid relid, AlterTableStmt *stmt,
                    break;
                }
 
+           case AT_AttachPartition:
+               {
+                   PartitionCmd *partcmd = (PartitionCmd *) cmd->def;
+
+                   transformAttachPartition(&cxt, partcmd);
+
+                   /* assign transformed values */
+                   partcmd->bound = cxt.partbound;
+               }
+
+               newcmds = lappend(newcmds, cmd);
+               break;
+
            default:
                newcmds = lappend(newcmds, cmd);
                break;
@@ -2958,3 +3046,237 @@ setSchemaName(char *context_schema, char **stmt_schema_name)
                        "different from the one being created (%s)",
                        *stmt_schema_name, context_schema)));
 }
+
+/*
+ * transformAttachPartition
+ *     Analyze ATTACH PARTITION ... FOR VALUES ...
+ */
+static void
+transformAttachPartition(CreateStmtContext *cxt, PartitionCmd *cmd)
+{
+   Relation    parentRel = cxt->rel;
+
+   /*
+    * We are going to try to validate the partition bound specification
+    * against the partition key of rel, so it better have one.
+    */
+   if (parentRel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+       ereport(ERROR,
+               (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+                errmsg("\"%s\" is not partitioned",
+                       RelationGetRelationName(parentRel))));
+
+   /* tranform the values */
+   Assert(RelationGetPartitionKey(parentRel) != NULL);
+   cxt->partbound = transformPartitionBound(cxt->pstate, parentRel,
+                                            cmd->bound);
+}
+
+/*
+ * transformPartitionBound
+ *
+ * Transform partition bound specification
+ */
+Node *
+transformPartitionBound(ParseState *pstate, Relation parent, Node *bound)
+{
+   PartitionBoundSpec *spec = (PartitionBoundSpec *) bound,
+              *result_spec;
+   PartitionKey key = RelationGetPartitionKey(parent);
+   char        strategy = get_partition_strategy(key);
+   int         partnatts = get_partition_natts(key);
+   List       *partexprs = get_partition_exprs(key);
+
+   result_spec = copyObject(spec);
+
+   if (strategy == PARTITION_STRATEGY_LIST)
+   {
+       ListCell   *cell;
+       char       *colname;
+
+       /* Get the only column's name in case we need to output an error */
+       if (key->partattrs[0] != 0)
+           colname = get_relid_attribute_name(RelationGetRelid(parent),
+                                              key->partattrs[0]);
+       else
+           colname = deparse_expression((Node *) linitial(partexprs),
+                        deparse_context_for(RelationGetRelationName(parent),
+                                            RelationGetRelid(parent)),
+                                        false, false);
+
+       if (spec->strategy != PARTITION_STRATEGY_LIST)
+           ereport(ERROR,
+                   (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+                 errmsg("invalid bound specification for a list partition"),
+                    parser_errposition(pstate, exprLocation(bound))));
+
+       result_spec->listdatums = NIL;
+       foreach(cell, spec->listdatums)
+       {
+           A_Const    *con = (A_Const *) lfirst(cell);
+           Node       *value;
+           ListCell   *cell2;
+           bool        duplicate;
+
+           value = (Node *) make_const(pstate, &con->val, con->location);
+           value = coerce_to_target_type(pstate,
+                                         value, exprType(value),
+                                         get_partition_col_typid(key, 0),
+                                         get_partition_col_typmod(key, 0),
+                                         COERCION_ASSIGNMENT,
+                                         COERCE_IMPLICIT_CAST,
+                                         -1);
+
+           if (value == NULL)
+               ereport(ERROR,
+                       (errcode(ERRCODE_DATATYPE_MISMATCH),
+                        errmsg("specified value cannot be cast to type \"%s\" of column \"%s\"",
+                            format_type_be(get_partition_col_typid(key, 0)),
+                               colname),
+                        parser_errposition(pstate,
+                                           exprLocation((Node *) con))));
+
+           /* Simplify the expression */
+           value = (Node *) expression_planner((Expr *) value);
+
+           /* Don't add to the result if the value is a duplicate */
+           duplicate = false;
+           foreach(cell2, result_spec->listdatums)
+           {
+               Const      *value2 = (Const *) lfirst(cell2);
+
+               if (equal(value, value2))
+               {
+                   duplicate = true;
+                   break;
+               }
+           }
+           if (duplicate)
+               continue;
+
+           result_spec->listdatums = lappend(result_spec->listdatums,
+                                             value);
+       }
+   }
+   else if (strategy == PARTITION_STRATEGY_RANGE)
+   {
+       ListCell   *cell1,
+                  *cell2;
+       int         i,
+                   j;
+       char       *colname;
+
+       if (spec->strategy != PARTITION_STRATEGY_RANGE)
+           ereport(ERROR,
+                   (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+                errmsg("invalid bound specification for a range partition"),
+                    parser_errposition(pstate, exprLocation(bound))));
+
+       Assert(spec->lowerdatums != NIL && spec->upperdatums != NIL);
+
+       if (list_length(spec->lowerdatums) != partnatts)
+           ereport(ERROR,
+                   (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+                    errmsg("FROM must specify exactly one value per partitioning column")));
+       if (list_length(spec->upperdatums) != partnatts)
+           ereport(ERROR,
+                   (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+                    errmsg("TO must specify exactly one value per partitioning column")));
+
+       i = j = 0;
+       result_spec->lowerdatums = result_spec->upperdatums = NIL;
+       forboth(cell1, spec->lowerdatums, cell2, spec->upperdatums)
+       {
+           PartitionRangeDatum *ldatum,
+                      *rdatum;
+           Node       *value;
+           A_Const    *lcon = NULL,
+                      *rcon = NULL;
+
+           ldatum = (PartitionRangeDatum *) lfirst(cell1);
+           rdatum = (PartitionRangeDatum *) lfirst(cell2);
+           /* Get the column's name in case we need to output an error */
+           if (key->partattrs[i] != 0)
+               colname = get_relid_attribute_name(RelationGetRelid(parent),
+                                                  key->partattrs[i]);
+           else
+           {
+               colname = deparse_expression((Node *) list_nth(partexprs, j),
+                        deparse_context_for(RelationGetRelationName(parent),
+                                            RelationGetRelid(parent)),
+                                            false, false);
+               ++j;
+           }
+
+           if (!ldatum->infinite)
+               lcon = (A_Const *) ldatum->value;
+           if (!rdatum->infinite)
+               rcon = (A_Const *) rdatum->value;
+
+           if (lcon)
+           {
+               value = (Node *) make_const(pstate, &lcon->val, lcon->location);
+               if (((Const *) value)->constisnull)
+                   ereport(ERROR,
+                           (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+                            errmsg("cannot specify NULL in range bound")));
+               value = coerce_to_target_type(pstate,
+                                             value, exprType(value),
+                                             get_partition_col_typid(key, i),
+                                           get_partition_col_typmod(key, i),
+                                             COERCION_ASSIGNMENT,
+                                             COERCE_IMPLICIT_CAST,
+                                             -1);
+               if (value == NULL)
+                   ereport(ERROR,
+                           (errcode(ERRCODE_DATATYPE_MISMATCH),
+                            errmsg("specified value cannot be cast to type \"%s\" of column \"%s\"",
+                            format_type_be(get_partition_col_typid(key, i)),
+                                   colname),
+                            parser_errposition(pstate, exprLocation((Node *) ldatum))));
+
+               /* Simplify the expression */
+               value = (Node *) expression_planner((Expr *) value);
+               ldatum->value = value;
+           }
+
+           if (rcon)
+           {
+               value = (Node *) make_const(pstate, &rcon->val, rcon->location);
+               if (((Const *) value)->constisnull)
+                   ereport(ERROR,
+                           (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+                            errmsg("cannot specify NULL in range bound")));
+               value = coerce_to_target_type(pstate,
+                                             value, exprType(value),
+                                             get_partition_col_typid(key, i),
+                                           get_partition_col_typmod(key, i),
+                                             COERCION_ASSIGNMENT,
+                                             COERCE_IMPLICIT_CAST,
+                                             -1);
+               if (value == NULL)
+                   ereport(ERROR,
+                           (errcode(ERRCODE_DATATYPE_MISMATCH),
+                            errmsg("specified value cannot be cast to type \"%s\" of column \"%s\"",
+                            format_type_be(get_partition_col_typid(key, i)),
+                                   colname),
+                            parser_errposition(pstate, exprLocation((Node *) rdatum))));
+
+               /* Simplify the expression */
+               value = (Node *) expression_planner((Expr *) value);
+               rdatum->value = value;
+           }
+
+           result_spec->lowerdatums = lappend(result_spec->lowerdatums,
+                                              copyObject(ldatum));
+           result_spec->upperdatums = lappend(result_spec->upperdatums,
+                                              copyObject(rdatum));
+
+           ++i;
+       }
+   }
+   else
+       elog(ERROR, "unexpected partition strategy: %d", (int) strategy);
+
+   return (Node *) result_spec;
+}
index f82d891c347297a117e2c52e54406ccb8a584065..32e132814971eb87a910061937c812fd51935005 100644 (file)
@@ -261,7 +261,8 @@ DefineQueryRewrite(char *rulename,
     */
    if (event_relation->rd_rel->relkind != RELKIND_RELATION &&
        event_relation->rd_rel->relkind != RELKIND_MATVIEW &&
-       event_relation->rd_rel->relkind != RELKIND_VIEW)
+       event_relation->rd_rel->relkind != RELKIND_VIEW &&
+       event_relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
        ereport(ERROR,
                (errcode(ERRCODE_WRONG_OBJECT_TYPE),
                 errmsg("\"%s\" is not a table or view",
index 65c3d6e081405c42153b2064fec01b85bfe53bc7..bf4f098c153aa7521af94b5da1595038b2310d99 100644 (file)
@@ -1231,7 +1231,8 @@ rewriteTargetListUD(Query *parsetree, RangeTblEntry *target_rte,
    TargetEntry *tle;
 
    if (target_relation->rd_rel->relkind == RELKIND_RELATION ||
-       target_relation->rd_rel->relkind == RELKIND_MATVIEW)
+       target_relation->rd_rel->relkind == RELKIND_MATVIEW ||
+       target_relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
    {
        /*
         * Emit CTID so that executor can find the row to update or delete.
index b7edefc7ddfdd8caacfb7956e99965db67cc1f2f..e38586dd80baf573ba460b452db73a4ca31340f9 100644 (file)
@@ -121,7 +121,8 @@ get_row_security_policies(Query *root, RangeTblEntry *rte, int rt_index,
    *hasSubLinks = false;
 
    /* If this is not a normal relation, just return immediately */
-   if (rte->relkind != RELKIND_RELATION)
+   if (rte->relkind != RELKIND_RELATION &&
+       rte->relkind != RELKIND_PARTITIONED_TABLE)
        return;
 
    /* Switch to checkAsUser if it's set */
index f50ce408ae6dc47ec9689fd32c73daa0dafae315..fd4eff490744b584b57d19abe124a2b2c6f7a7ec 100644 (file)
@@ -987,7 +987,8 @@ ProcessUtilitySlow(ParseState *pstate,
                            /* Create the table itself */
                            address = DefineRelation((CreateStmt *) stmt,
                                                     RELKIND_RELATION,
-                                                    InvalidOid, NULL);
+                                                    InvalidOid, NULL,
+                                                    queryString);
                            EventTriggerCollectSimpleCommand(address,
                                                             secondaryObject,
                                                             stmt);
@@ -1020,7 +1021,8 @@ ProcessUtilitySlow(ParseState *pstate,
                            /* Create the table itself */
                            address = DefineRelation((CreateStmt *) stmt,
                                                     RELKIND_FOREIGN_TABLE,
-                                                    InvalidOid, NULL);
+                                                    InvalidOid, NULL,
+                                                    queryString);
                            CreateForeignTable((CreateForeignTableStmt *) stmt,
                                               address.objectId);
                            EventTriggerCollectSimpleCommand(address,
index fecee85e5bae20a27fb365ecf1eab499e71674ca..4e2ba19d1b77814b7ee378d024eec29a94616018 100644 (file)
@@ -33,6 +33,7 @@
 #include "catalog/pg_language.h"
 #include "catalog/pg_opclass.h"
 #include "catalog/pg_operator.h"
+#include "catalog/pg_partitioned_table.h"
 #include "catalog/pg_proc.h"
 #include "catalog/pg_trigger.h"
 #include "catalog/pg_type.h"
@@ -315,6 +316,7 @@ static char *pg_get_indexdef_worker(Oid indexrelid, int colno,
                       const Oid *excludeOps,
                       bool attrsOnly, bool showTblSpc,
                       int prettyFlags, bool missing_ok);
+static char *pg_get_partkeydef_worker(Oid relid, int prettyFlags);
 static char *pg_get_constraintdef_worker(Oid constraintId, bool fullCommand,
                            int prettyFlags, bool missing_ok);
 static text *pg_get_expr_worker(text *expr, Oid relid, const char *relname,
@@ -1415,6 +1417,163 @@ pg_get_indexdef_worker(Oid indexrelid, int colno,
    return buf.data;
 }
 
+/*
+ * pg_get_partkeydef
+ *
+ * Returns the partition key specification, ie, the following:
+ *
+ * PARTITION BY { RANGE | LIST } (column opt_collation opt_opclass [, ...])
+ */
+Datum
+pg_get_partkeydef(PG_FUNCTION_ARGS)
+{
+   Oid         relid = PG_GETARG_OID(0);
+
+   PG_RETURN_TEXT_P(string_to_text(pg_get_partkeydef_worker(relid,
+                                   PRETTYFLAG_INDENT)));
+}
+
+/*
+ * Internal workhorse to decompile a partition key definition.
+ */
+static char *
+pg_get_partkeydef_worker(Oid relid, int prettyFlags)
+{
+   Form_pg_partitioned_table   form;
+   HeapTuple   tuple;
+   oidvector  *partclass;
+   oidvector  *partcollation;
+   List       *partexprs;
+   ListCell   *partexpr_item;
+   List       *context;
+   Datum       datum;
+   bool        isnull;
+   StringInfoData buf;
+   int         keyno;
+   char       *str;
+   char       *sep;
+
+   tuple = SearchSysCache1(PARTRELID, ObjectIdGetDatum(relid));
+   if (!HeapTupleIsValid(tuple))
+       elog(ERROR, "cache lookup failed for partition key of %u", relid);
+
+   form = (Form_pg_partitioned_table) GETSTRUCT(tuple);
+
+   Assert(form->partrelid == relid);
+
+   /* Must get partclass and partcollation the hard way */
+   datum = SysCacheGetAttr(PARTRELID, tuple,
+                           Anum_pg_partitioned_table_partclass, &isnull);
+   Assert(!isnull);
+   partclass = (oidvector *) DatumGetPointer(datum);
+
+   datum = SysCacheGetAttr(PARTRELID, tuple,
+                           Anum_pg_partitioned_table_partcollation, &isnull);
+   Assert(!isnull);
+   partcollation = (oidvector *) DatumGetPointer(datum);
+
+
+   /*
+    * Get the expressions, if any.  (NOTE: we do not use the relcache
+    * versions of the expressions, because we want to display non-const-folded
+    * expressions.)
+    */
+   if (!heap_attisnull(tuple, Anum_pg_partitioned_table_partexprs))
+   {
+       Datum       exprsDatum;
+       bool        isnull;
+       char       *exprsString;
+
+       exprsDatum = SysCacheGetAttr(PARTRELID, tuple,
+                                    Anum_pg_partitioned_table_partexprs, &isnull);
+       Assert(!isnull);
+       exprsString = TextDatumGetCString(exprsDatum);
+       partexprs = (List *) stringToNode(exprsString);
+
+       if (!IsA(partexprs, List))
+           elog(ERROR, "unexpected node type found in partexprs: %d",
+                       (int) nodeTag(partexprs));
+
+       pfree(exprsString);
+   }
+   else
+       partexprs = NIL;
+
+   partexpr_item = list_head(partexprs);
+   context = deparse_context_for(get_relation_name(relid), relid);
+
+   initStringInfo(&buf);
+
+   switch (form->partstrat)
+   {
+       case PARTITION_STRATEGY_LIST:
+           appendStringInfo(&buf, "LIST");
+           break;
+       case PARTITION_STRATEGY_RANGE:
+           appendStringInfo(&buf, "RANGE");
+           break;
+       default:
+           elog(ERROR, "unexpected partition strategy: %d",
+                       (int) form->partstrat);
+   }
+
+   appendStringInfo(&buf, " (");
+   sep = "";
+   for (keyno = 0; keyno < form->partnatts; keyno++)
+   {
+       AttrNumber  attnum = form->partattrs.values[keyno];
+       Oid         keycoltype;
+       Oid         keycolcollation;
+       Oid         partcoll;
+
+       appendStringInfoString(&buf, sep);
+       sep = ", ";
+       if (attnum != 0)
+       {
+           /* Simple attribute reference */
+           char       *attname;
+           int32       keycoltypmod;
+
+           attname = get_relid_attribute_name(relid, attnum);
+           appendStringInfoString(&buf, quote_identifier(attname));
+           get_atttypetypmodcoll(relid, attnum,
+                                 &keycoltype, &keycoltypmod,
+                                 &keycolcollation);
+       }
+       else
+       {
+           /* Expression */
+           Node       *partkey;
+
+           if (partexpr_item == NULL)
+               elog(ERROR, "too few entries in partexprs list");
+           partkey = (Node *) lfirst(partexpr_item);
+           partexpr_item = lnext(partexpr_item);
+           /* Deparse */
+           str = deparse_expression_pretty(partkey, context, false, false,
+                                           0, 0);
+
+           appendStringInfoString(&buf, str);
+           keycoltype = exprType(partkey);
+           keycolcollation = exprCollation(partkey);
+       }
+
+       /* Add collation, if not default for column */
+       partcoll = partcollation->values[keyno];
+       if (OidIsValid(partcoll) && partcoll != keycolcollation)
+           appendStringInfo(&buf, " COLLATE %s",
+                            generate_collation_name((partcoll)));
+
+       /* Add the operator class name, if not default */
+       get_opclass_name(partclass->values[keyno], keycoltype, &buf);
+   }
+   appendStringInfoChar(&buf, ')');
+
+   /* Clean up */
+   ReleaseSysCache(tuple);
+
+   return buf.data;
+}
 
 /*
  * pg_get_constraintdef
@@ -8291,6 +8450,88 @@ get_rule_expr(Node *node, deparse_context *context,
            }
            break;
 
+       case T_PartitionBoundSpec:
+           {
+               PartitionBoundSpec *spec = (PartitionBoundSpec *) node;
+               ListCell *cell;
+               char     *sep;
+
+               switch (spec->strategy)
+               {
+                   case PARTITION_STRATEGY_LIST:
+                       Assert(spec->listdatums != NIL);
+
+                       appendStringInfoString(buf, "FOR VALUES");
+                       appendStringInfoString(buf, " IN (");
+                       sep = "";
+                       foreach (cell, spec->listdatums)
+                       {
+                           Const *val = lfirst(cell);
+
+                           appendStringInfoString(buf, sep);
+                           get_const_expr(val, context, -1);
+                           sep = ", ";
+                       }
+
+                       appendStringInfoString(buf, ")");
+                       break;
+
+                   case PARTITION_STRATEGY_RANGE:
+                       Assert(spec->lowerdatums != NIL &&
+                              spec->upperdatums != NIL &&
+                              list_length(spec->lowerdatums) ==
+                              list_length(spec->upperdatums));
+
+                       appendStringInfoString(buf, "FOR VALUES");
+                       appendStringInfoString(buf, " FROM");
+                       appendStringInfoString(buf, " (");
+                       sep = "";
+                       foreach (cell, spec->lowerdatums)
+                       {
+                           PartitionRangeDatum *datum = lfirst(cell);
+                           Const *val;
+
+                           appendStringInfoString(buf, sep);
+                           if (datum->infinite)
+                               appendStringInfoString(buf, "UNBOUNDED");
+                           else
+                           {
+                               val = (Const *) datum->value;
+                               get_const_expr(val, context, -1);
+                           }
+                           sep = ", ";
+                       }
+                       appendStringInfoString(buf, ")");
+
+                       appendStringInfoString(buf, " TO");
+                       appendStringInfoString(buf, " (");
+                       sep = "";
+                       foreach (cell, spec->upperdatums)
+                       {
+                           PartitionRangeDatum *datum = lfirst(cell);
+                           Const *val;
+
+                           appendStringInfoString(buf, sep);
+                           if (datum->infinite)
+                               appendStringInfoString(buf, "UNBOUNDED");
+                           else
+                           {
+                               val = (Const *) datum->value;
+                               get_const_expr(val, context, -1);
+                           }
+                           sep = ", ";
+                       }
+                       appendStringInfoString(buf, ")");
+                       break;
+
+                   default:
+                       elog(ERROR, "unrecognized partition strategy: %d",
+                            (int) spec->strategy);
+                       break;
+               }
+           }
+           break;
+
        case T_List:
            {
                char       *sep;
index 79e0b1ff483008a5dd7a9f302feb4af461b4ee93..2a6835991c38d1265a908bc224a45b5e458af0cb 100644 (file)
@@ -32,6 +32,7 @@
 
 #include "access/htup_details.h"
 #include "access/multixact.h"
+#include "access/nbtree.h"
 #include "access/reloptions.h"
 #include "access/sysattr.h"
 #include "access/xact.h"
@@ -40,6 +41,7 @@
 #include "catalog/index.h"
 #include "catalog/indexing.h"
 #include "catalog/namespace.h"
+#include "catalog/partition.h"
 #include "catalog/pg_am.h"
 #include "catalog/pg_amproc.h"
 #include "catalog/pg_attrdef.h"
@@ -49,6 +51,7 @@
 #include "catalog/pg_database.h"
 #include "catalog/pg_namespace.h"
 #include "catalog/pg_opclass.h"
+#include "catalog/pg_partitioned_table.h"
 #include "catalog/pg_proc.h"
 #include "catalog/pg_rewrite.h"
 #include "catalog/pg_shseclabel.h"
@@ -258,6 +261,8 @@ static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_hi
 static Relation AllocateRelationDesc(Form_pg_class relp);
 static void RelationParseRelOptions(Relation relation, HeapTuple tuple);
 static void RelationBuildTupleDesc(Relation relation);
+static void RelationBuildPartitionKey(Relation relation);
+static PartitionKey copy_partition_key(PartitionKey fromkey);
 static Relation RelationBuildDesc(Oid targetRelId, bool insertIt);
 static void RelationInitPhysicalAddr(Relation relation);
 static void load_critical_index(Oid indexoid, Oid heapoid);
@@ -278,6 +283,8 @@ static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid,
                  StrategyNumber numSupport);
 static void RelationCacheInitFileRemoveInDir(const char *tblspcpath);
 static void unlink_initfile(const char *initfilename);
+static bool equalPartitionDescs(PartitionKey key, PartitionDesc partdesc1,
+                   PartitionDesc partdesc2);
 
 
 /*
@@ -435,6 +442,7 @@ RelationParseRelOptions(Relation relation, HeapTuple tuple)
        case RELKIND_INDEX:
        case RELKIND_VIEW:
        case RELKIND_MATVIEW:
+       case RELKIND_PARTITIONED_TABLE:
            break;
        default:
            return;
@@ -795,6 +803,237 @@ RelationBuildRuleLock(Relation relation)
    relation->rd_rules = rulelock;
 }
 
+/*
+ * RelationBuildPartitionKey
+ *     Build and attach to relcache partition key data of relation
+ *
+ * Partitioning key data is stored in CacheMemoryContext to ensure it survives
+ * as long as the relcache.  To avoid leaking memory in that context in case
+ * of an error partway through this function, we build the structure in the
+ * working context (which must be short-lived) and copy the completed
+ * structure into the cache memory.
+ *
+ * Also, since the structure being created here is sufficiently complex, we
+ * make a private child context of CacheMemoryContext for each relation that
+ * has associated partition key information.  That means no complicated logic
+ * to free individual elements whenever the relcache entry is flushed - just
+ * delete the context.
+ */
+static void
+RelationBuildPartitionKey(Relation relation)
+{
+   Form_pg_partitioned_table form;
+   HeapTuple   tuple;
+   bool        isnull;
+   int         i;
+   PartitionKey key;
+   AttrNumber *attrs;
+   oidvector  *opclass;
+   oidvector  *collation;
+   ListCell   *partexprs_item;
+   Datum       datum;
+   MemoryContext partkeycxt,
+               oldcxt;
+
+   tuple = SearchSysCache1(PARTRELID,
+                           ObjectIdGetDatum(RelationGetRelid(relation)));
+
+   /*
+    * The following happens when we have created our pg_class entry but not
+    * the pg_partitioned_table entry yet.
+    */
+   if (!HeapTupleIsValid(tuple))
+       return;
+
+   key = (PartitionKey) palloc0(sizeof(PartitionKeyData));
+
+   /* Fixed-length attributes */
+   form = (Form_pg_partitioned_table) GETSTRUCT(tuple);
+   key->strategy = form->partstrat;
+   key->partnatts = form->partnatts;
+
+   /*
+    * We can rely on the first variable-length attribute being mapped to the
+    * relevant field of the catalog's C struct, because all previous
+    * attributes are non-nullable and fixed-length.
+    */
+   attrs = form->partattrs.values;
+
+   /* But use the hard way to retrieve further variable-length attributes */
+   /* Operator class */
+   datum = SysCacheGetAttr(PARTRELID, tuple,
+                           Anum_pg_partitioned_table_partclass, &isnull);
+   Assert(!isnull);
+   opclass = (oidvector *) DatumGetPointer(datum);
+
+   /* Collation */
+   datum = SysCacheGetAttr(PARTRELID, tuple,
+                           Anum_pg_partitioned_table_partcollation, &isnull);
+   Assert(!isnull);
+   collation = (oidvector *) DatumGetPointer(datum);
+
+   /* Expressions */
+   datum = SysCacheGetAttr(PARTRELID, tuple,
+                           Anum_pg_partitioned_table_partexprs, &isnull);
+   if (!isnull)
+   {
+       char       *exprString;
+       Node       *expr;
+
+       exprString = TextDatumGetCString(datum);
+       expr = stringToNode(exprString);
+       pfree(exprString);
+
+       /*
+        * Run the expressions through const-simplification since the planner
+        * will be comparing them to similarly-processed qual clause operands,
+        * and may fail to detect valid matches without this step.  We don't
+        * need to bother with canonicalize_qual() though, because partition
+        * expressions are not full-fledged qualification clauses.
+        */
+       expr = eval_const_expressions(NULL, (Node *) expr);
+
+       /* May as well fix opfuncids too */
+       fix_opfuncids((Node *) expr);
+       key->partexprs = (List *) expr;
+   }
+
+   key->partattrs = (AttrNumber *) palloc0(key->partnatts * sizeof(AttrNumber));
+   key->partopfamily = (Oid *) palloc0(key->partnatts * sizeof(Oid));
+   key->partopcintype = (Oid *) palloc0(key->partnatts * sizeof(Oid));
+   key->partsupfunc = (FmgrInfo *) palloc0(key->partnatts * sizeof(FmgrInfo));
+
+   key->partcollation = (Oid *) palloc0(key->partnatts * sizeof(Oid));
+
+   /* Gather type and collation info as well */
+   key->parttypid = (Oid *) palloc0(key->partnatts * sizeof(Oid));
+   key->parttypmod = (int32 *) palloc0(key->partnatts * sizeof(int32));
+   key->parttyplen = (int16 *) palloc0(key->partnatts * sizeof(int16));
+   key->parttypbyval = (bool *) palloc0(key->partnatts * sizeof(bool));
+   key->parttypalign = (char *) palloc0(key->partnatts * sizeof(char));
+   key->parttypcoll = (Oid *) palloc0(key->partnatts * sizeof(Oid));
+
+   /* Copy partattrs and fill other per-attribute info */
+   memcpy(key->partattrs, attrs, key->partnatts * sizeof(int16));
+   partexprs_item = list_head(key->partexprs);
+   for (i = 0; i < key->partnatts; i++)
+   {
+       AttrNumber  attno = key->partattrs[i];
+       HeapTuple   opclasstup;
+       Form_pg_opclass opclassform;
+       Oid         funcid;
+
+       /* Collect opfamily information */
+       opclasstup = SearchSysCache1(CLAOID,
+                                    ObjectIdGetDatum(opclass->values[i]));
+       if (!HeapTupleIsValid(opclasstup))
+           elog(ERROR, "cache lookup failed for opclass %u", opclass->values[i]);
+
+       opclassform = (Form_pg_opclass) GETSTRUCT(opclasstup);
+       key->partopfamily[i] = opclassform->opcfamily;
+       key->partopcintype[i] = opclassform->opcintype;
+
+       /*
+        * A btree support function covers the cases of list and range methods
+        * currently supported.
+        */
+       funcid = get_opfamily_proc(opclassform->opcfamily,
+                                  opclassform->opcintype,
+                                  opclassform->opcintype,
+                                  BTORDER_PROC);
+
+       fmgr_info(funcid, &key->partsupfunc[i]);
+
+       /* Collation */
+       key->partcollation[i] = collation->values[i];
+
+       /* Collect type information */
+       if (attno != 0)
+       {
+           key->parttypid[i] = relation->rd_att->attrs[attno - 1]->atttypid;
+           key->parttypmod[i] = relation->rd_att->attrs[attno - 1]->atttypmod;
+           key->parttypcoll[i] = relation->rd_att->attrs[attno - 1]->attcollation;
+       }
+       else
+       {
+           key->parttypid[i] = exprType(lfirst(partexprs_item));
+           key->parttypmod[i] = exprTypmod(lfirst(partexprs_item));
+           key->parttypcoll[i] = exprCollation(lfirst(partexprs_item));
+       }
+       get_typlenbyvalalign(key->parttypid[i],
+                            &key->parttyplen[i],
+                            &key->parttypbyval[i],
+                            &key->parttypalign[i]);
+
+       ReleaseSysCache(opclasstup);
+   }
+
+   ReleaseSysCache(tuple);
+
+   /* Success --- now copy to the cache memory */
+   partkeycxt = AllocSetContextCreate(CacheMemoryContext,
+                                      RelationGetRelationName(relation),
+                                      ALLOCSET_SMALL_SIZES);
+   relation->rd_partkeycxt = partkeycxt;
+   oldcxt = MemoryContextSwitchTo(relation->rd_partkeycxt);
+   relation->rd_partkey = copy_partition_key(key);
+   MemoryContextSwitchTo(oldcxt);
+}
+
+/*
+ * copy_partition_key
+ *
+ * The copy is allocated in the current memory context.
+ */
+static PartitionKey
+copy_partition_key(PartitionKey fromkey)
+{
+   PartitionKey newkey;
+   int         n;
+
+   newkey = (PartitionKey) palloc(sizeof(PartitionKeyData));
+
+   newkey->strategy = fromkey->strategy;
+   newkey->partnatts = n = fromkey->partnatts;
+
+   newkey->partattrs = (AttrNumber *) palloc(n * sizeof(AttrNumber));
+   memcpy(newkey->partattrs, fromkey->partattrs, n * sizeof(AttrNumber));
+
+   newkey->partexprs = copyObject(fromkey->partexprs);
+
+   newkey->partopfamily = (Oid *) palloc(n * sizeof(Oid));
+   memcpy(newkey->partopfamily, fromkey->partopfamily, n * sizeof(Oid));
+
+   newkey->partopcintype = (Oid *) palloc(n * sizeof(Oid));
+   memcpy(newkey->partopcintype, fromkey->partopcintype, n * sizeof(Oid));
+
+   newkey->partsupfunc = (FmgrInfo *) palloc(n * sizeof(FmgrInfo));
+   memcpy(newkey->partsupfunc, fromkey->partsupfunc, n * sizeof(FmgrInfo));
+
+   newkey->partcollation = (Oid *) palloc(n * sizeof(Oid));
+   memcpy(newkey->partcollation, fromkey->partcollation, n * sizeof(Oid));
+
+   newkey->parttypid = (Oid *) palloc(n * sizeof(Oid));
+   memcpy(newkey->parttypid, fromkey->parttypid, n * sizeof(Oid));
+
+   newkey->parttypmod = (int32 *) palloc(n * sizeof(int32));
+   memcpy(newkey->parttypmod, fromkey->parttypmod, n * sizeof(int32));
+
+   newkey->parttyplen = (int16 *) palloc(n * sizeof(int16));
+   memcpy(newkey->parttyplen, fromkey->parttyplen, n * sizeof(int16));
+
+   newkey->parttypbyval = (bool *) palloc(n * sizeof(bool));
+   memcpy(newkey->parttypbyval, fromkey->parttypbyval, n * sizeof(bool));
+
+   newkey->parttypalign = (char *) palloc(n * sizeof(bool));
+   memcpy(newkey->parttypalign, fromkey->parttypalign, n * sizeof(char));
+
+   newkey->parttypcoll = (Oid *) palloc(n * sizeof(Oid));
+   memcpy(newkey->parttypcoll, fromkey->parttypcoll, n * sizeof(Oid));
+
+   return newkey;
+}
+
 /*
  *     equalRuleLocks
  *
@@ -922,6 +1161,58 @@ equalRSDesc(RowSecurityDesc *rsdesc1, RowSecurityDesc *rsdesc2)
    return true;
 }
 
+/*
+ * equalPartitionDescs
+ *     Compare two partition descriptors for logical equality
+ */
+static bool
+equalPartitionDescs(PartitionKey key, PartitionDesc partdesc1,
+                   PartitionDesc partdesc2)
+{
+   int         i;
+
+   if (partdesc1 != NULL)
+   {
+       if (partdesc2 == NULL)
+           return false;
+       if (partdesc1->nparts != partdesc2->nparts)
+           return false;
+
+       Assert(key != NULL || partdesc1->nparts == 0);
+
+       /*
+        * Same oids? If the partitioning structure did not change, that is,
+        * no partitions were added or removed to the relation, the oids array
+        * should still match element-by-element.
+        */
+       for (i = 0; i < partdesc1->nparts; i++)
+       {
+           if (partdesc1->oids[i] != partdesc2->oids[i])
+               return false;
+       }
+
+       /*
+        * Now compare partition bound collections.  The logic to iterate over
+        * the collections is private to partition.c.
+        */
+       if (partdesc1->boundinfo != NULL)
+       {
+           if (partdesc2->boundinfo == NULL)
+               return false;
+
+           if (!partition_bounds_equal(key, partdesc1->boundinfo,
+                                       partdesc2->boundinfo))
+               return false;
+       }
+       else if (partdesc2->boundinfo != NULL)
+           return false;
+   }
+   else if (partdesc2 != NULL)
+       return false;
+
+   return true;
+}
+
 /*
  *     RelationBuildDesc
  *
@@ -1050,6 +1341,20 @@ RelationBuildDesc(Oid targetRelId, bool insertIt)
    relation->rd_fkeylist = NIL;
    relation->rd_fkeyvalid = false;
 
+   /* if a partitioned table, initialize key and partition descriptor info */
+   if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+   {
+       RelationBuildPartitionKey(relation);
+       RelationBuildPartitionDesc(relation);
+   }
+   else
+   {
+       relation->rd_partkeycxt = NULL;
+       relation->rd_partkey = NULL;
+       relation->rd_partdesc = NULL;
+       relation->rd_pdcxt = NULL;
+   }
+
    /*
     * if it's an index, initialize index-related information
     */
@@ -2042,6 +2347,12 @@ RelationDestroyRelation(Relation relation, bool remember_tupdesc)
        MemoryContextDelete(relation->rd_rulescxt);
    if (relation->rd_rsdesc)
        MemoryContextDelete(relation->rd_rsdesc->rscxt);
+   if (relation->rd_partkeycxt)
+       MemoryContextDelete(relation->rd_partkeycxt);
+   if (relation->rd_pdcxt)
+       MemoryContextDelete(relation->rd_pdcxt);
+   if (relation->rd_partcheck)
+       pfree(relation->rd_partcheck);
    if (relation->rd_fdwroutine)
        pfree(relation->rd_fdwroutine);
    pfree(relation);
@@ -2190,11 +2501,12 @@ RelationClearRelation(Relation relation, bool rebuild)
         *
         * When rebuilding an open relcache entry, we must preserve ref count,
         * rd_createSubid/rd_newRelfilenodeSubid, and rd_toastoid state.  Also
-        * attempt to preserve the pg_class entry (rd_rel), tupledesc, and
-        * rewrite-rule substructures in place, because various places assume
-        * that these structures won't move while they are working with an
-        * open relcache entry.  (Note: the refcount mechanism for tupledescs
-        * might someday allow us to remove this hack for the tupledesc.)
+        * attempt to preserve the pg_class entry (rd_rel), tupledesc,
+        * rewrite-rule, partition key, and partition descriptor substructures
+        * in place, because various places assume that these structures won't
+        * move while they are working with an open relcache entry.  (Note:
+        * the refcount mechanism for tupledescs might someday allow us to
+        * remove this hack for the tupledesc.)
         *
         * Note that this process does not touch CurrentResourceOwner; which
         * is good because whatever ref counts the entry may have do not
@@ -2205,6 +2517,8 @@ RelationClearRelation(Relation relation, bool rebuild)
        bool        keep_tupdesc;
        bool        keep_rules;
        bool        keep_policies;
+       bool        keep_partkey;
+       bool        keep_partdesc;
 
        /* Build temporary entry, but don't link it into hashtable */
        newrel = RelationBuildDesc(save_relid, false);
@@ -2235,6 +2549,10 @@ RelationClearRelation(Relation relation, bool rebuild)
        keep_tupdesc = equalTupleDescs(relation->rd_att, newrel->rd_att);
        keep_rules = equalRuleLocks(relation->rd_rules, newrel->rd_rules);
        keep_policies = equalRSDesc(relation->rd_rsdesc, newrel->rd_rsdesc);
+       keep_partkey = (relation->rd_partkey != NULL);
+       keep_partdesc = equalPartitionDescs(relation->rd_partkey,
+                                           relation->rd_partdesc,
+                                           newrel->rd_partdesc);
 
        /*
         * Perform swapping of the relcache entry contents.  Within this
@@ -2289,6 +2607,18 @@ RelationClearRelation(Relation relation, bool rebuild)
        SWAPFIELD(Oid, rd_toastoid);
        /* pgstat_info must be preserved */
        SWAPFIELD(struct PgStat_TableStatus *, pgstat_info);
+       /* partition key must be preserved, if we have one */
+       if (keep_partkey)
+       {
+           SWAPFIELD(PartitionKey, rd_partkey);
+           SWAPFIELD(MemoryContext, rd_partkeycxt);
+       }
+       /* preserve old partdesc if no logical change */
+       if (keep_partdesc)
+       {
+           SWAPFIELD(PartitionDesc, rd_partdesc);
+           SWAPFIELD(MemoryContext, rd_pdcxt);
+       }
 
 #undef SWAPFIELD
 
@@ -2983,7 +3313,9 @@ RelationBuildLocalRelation(const char *relname,
 
    /* system relations and non-table objects don't have one */
    if (!IsSystemNamespace(relnamespace) &&
-       (relkind == RELKIND_RELATION || relkind == RELKIND_MATVIEW))
+       (relkind == RELKIND_RELATION ||
+        relkind == RELKIND_MATVIEW ||
+        relkind == RELKIND_PARTITIONED_TABLE))
        rel->rd_rel->relreplident = REPLICA_IDENTITY_DEFAULT;
    else
        rel->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING;
@@ -3514,6 +3846,20 @@ RelationCacheInitializePhase3(void)
            restart = true;
        }
 
+       /*
+        * Reload partition key and descriptor for a partitioned table.
+        */
+       if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+       {
+           RelationBuildPartitionKey(relation);
+           Assert(relation->rd_partkey != NULL);
+
+           RelationBuildPartitionDesc(relation);
+           Assert(relation->rd_partdesc != NULL);
+
+           restart = true;
+       }
+
        /* Release hold on the relation */
        RelationDecrementReferenceCount(relation);
 
@@ -4267,6 +4613,8 @@ RelationGetIndexExpressions(Relation relation)
     */
    result = (List *) eval_const_expressions(NULL, (Node *) result);
 
+   result = (List *) canonicalize_qual((Expr *) result);
+
    /* May as well fix opfuncids too */
    fix_opfuncids((Node *) result);
 
@@ -5035,6 +5383,10 @@ load_relcache_init_file(bool shared)
        rel->rd_rulescxt = NULL;
        rel->trigdesc = NULL;
        rel->rd_rsdesc = NULL;
+       rel->rd_partkeycxt = NULL;
+       rel->rd_partkey = NULL;
+       rel->rd_partdesc = NULL;
+       rel->rd_partcheck = NIL;
        rel->rd_indexprs = NIL;
        rel->rd_indpred = NIL;
        rel->rd_exclops = NULL;
index 65ffe844093ac3f1da977427da8d9621f451222b..a3e0517b9408dc4421f412606ec9c125e090407f 100644 (file)
@@ -48,6 +48,7 @@
 #include "catalog/pg_opclass.h"
 #include "catalog/pg_operator.h"
 #include "catalog/pg_opfamily.h"
+#include "catalog/pg_partitioned_table.h"
 #include "catalog/pg_proc.h"
 #include "catalog/pg_range.h"
 #include "catalog/pg_rewrite.h"
@@ -568,6 +569,17 @@ static const struct cachedesc cacheinfo[] = {
        },
        8
    },
+   {PartitionedRelationId,     /* PARTRELID */
+       PartitionedRelidIndexId,
+       1,
+       {
+           Anum_pg_partitioned_table_partrelid,
+           0,
+           0,
+           0
+       },
+       32
+   },
    {ProcedureRelationId,       /* PROCNAMEARGSNSP */
        ProcedureNameArgsNspIndexId,
        3,
index 1cbb9874f321b9adb09fd3d28c4cbbc3ffd8648b..22f1806eca83360628a606a37e6dbfaf660fbbc9 100644 (file)
@@ -68,6 +68,8 @@ static int    numextmembers;
 
 static void flagInhTables(TableInfo *tbinfo, int numTables,
              InhInfo *inhinfo, int numInherits);
+static void flagPartitions(TableInfo *tblinfo, int numTables,
+             PartInfo *partinfo, int numPartitions);
 static void flagInhAttrs(DumpOptions *dopt, TableInfo *tblinfo, int numTables);
 static DumpableObject **buildIndexArray(void *objArray, int numObjs,
                Size objSize);
@@ -75,6 +77,8 @@ static int    DOCatalogIdCompare(const void *p1, const void *p2);
 static int ExtensionMemberIdCompare(const void *p1, const void *p2);
 static void findParentsByOid(TableInfo *self,
                 InhInfo *inhinfo, int numInherits);
+static void findPartitionParentByOid(TableInfo *self, PartInfo *partinfo,
+                int numPartitions);
 static int strInArray(const char *pattern, char **arr, int arr_size);
 
 
@@ -93,8 +97,10 @@ getSchemaData(Archive *fout, int *numTablesPtr)
    NamespaceInfo *nspinfo;
    ExtensionInfo *extinfo;
    InhInfo    *inhinfo;
+   PartInfo    *partinfo;
    int         numAggregates;
    int         numInherits;
+   int         numPartitions;
    int         numRules;
    int         numProcLangs;
    int         numCasts;
@@ -231,6 +237,10 @@ getSchemaData(Archive *fout, int *numTablesPtr)
        write_msg(NULL, "reading table inheritance information\n");
    inhinfo = getInherits(fout, &numInherits);
 
+   if (g_verbose)
+       write_msg(NULL, "reading partition information\n");
+   partinfo = getPartitions(fout, &numPartitions);
+
    if (g_verbose)
        write_msg(NULL, "reading event triggers\n");
    getEventTriggers(fout, &numEventTriggers);
@@ -245,6 +255,11 @@ getSchemaData(Archive *fout, int *numTablesPtr)
        write_msg(NULL, "finding inheritance relationships\n");
    flagInhTables(tblinfo, numTables, inhinfo, numInherits);
 
+   /* Link tables to partition parents, mark parents as interesting */
+   if (g_verbose)
+       write_msg(NULL, "finding partition relationships\n");
+   flagPartitions(tblinfo, numTables, partinfo, numPartitions);
+
    if (g_verbose)
        write_msg(NULL, "reading column info for interesting tables\n");
    getTableAttrs(fout, tblinfo, numTables);
@@ -273,6 +288,10 @@ getSchemaData(Archive *fout, int *numTablesPtr)
        write_msg(NULL, "reading policies\n");
    getPolicies(fout, tblinfo, numTables);
 
+   if (g_verbose)
+       write_msg(NULL, "reading partition key information for interesting tables\n");
+   getTablePartitionKeyInfo(fout, tblinfo, numTables);
+
    *numTablesPtr = numTables;
    return tblinfo;
 }
@@ -319,6 +338,43 @@ flagInhTables(TableInfo *tblinfo, int numTables,
    }
 }
 
+/* flagPartitions -
+ *  Fill in parent link fields of every target table that is partition,
+ *  and mark parents of partitions as interesting
+ *
+ * modifies tblinfo
+ */
+static void
+flagPartitions(TableInfo *tblinfo, int numTables,
+             PartInfo *partinfo, int numPartitions)
+{
+   int     i;
+
+   for (i = 0; i < numTables; i++)
+   {
+       /* Some kinds are never partitions */
+       if (tblinfo[i].relkind == RELKIND_SEQUENCE ||
+           tblinfo[i].relkind == RELKIND_VIEW ||
+           tblinfo[i].relkind == RELKIND_MATVIEW)
+           continue;
+
+       /* Don't bother computing anything for non-target tables, either */
+       if (!tblinfo[i].dobj.dump)
+           continue;
+
+       /* Find the parent TableInfo and save */
+       findPartitionParentByOid(&tblinfo[i], partinfo, numPartitions);
+
+       /* Mark the parent as interesting for getTableAttrs */
+       if (tblinfo[i].partitionOf)
+       {
+           tblinfo[i].partitionOf->interesting = true;
+           addObjectDependency(&tblinfo[i].dobj,
+                               tblinfo[i].partitionOf->dobj.dumpId);
+       }
+   }
+}
+
 /* flagInhAttrs -
  *  for each dumpable table in tblinfo, flag its inherited attributes
  *
@@ -919,6 +975,40 @@ findParentsByOid(TableInfo *self,
        self->parents = NULL;
 }
 
+/*
+ * findPartitionParentByOid
+ *   find a partition's parent in tblinfo[]
+ */
+static void
+findPartitionParentByOid(TableInfo *self, PartInfo *partinfo,
+                        int numPartitions)
+{
+   Oid         oid = self->dobj.catId.oid;
+   int         i;
+
+   for (i = 0; i < numPartitions; i++)
+   {
+       if (partinfo[i].partrelid == oid)
+       {
+           TableInfo  *parent;
+
+           parent = findTableByOid(partinfo[i].partparent);
+           if (parent == NULL)
+           {
+               write_msg(NULL, "failed sanity check, parent OID %u of table \"%s\" (OID %u) not found\n",
+                         partinfo[i].partparent,
+                         self->dobj.name,
+                         oid);
+               exit_nicely(1);
+           }
+           self->partitionOf = parent;
+
+           /* While we're at it, also save the partdef */
+           self->partitiondef = partinfo[i].partdef;
+       }
+   }
+}
+
 /*
  * parseOidArray
  *   parse a string of numbers delimited by spaces into a character array
index 42873bb32ac918539c04afd0f89179991e330df8..b43d152e77e580f6bee0cf380a31fcba34f54ed8 100644 (file)
@@ -1239,9 +1239,10 @@ expand_table_name_patterns(Archive *fout,
                          "SELECT c.oid"
                          "\nFROM pg_catalog.pg_class c"
        "\n     LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace"
-                    "\nWHERE c.relkind in ('%c', '%c', '%c', '%c', '%c')\n",
+                    "\nWHERE c.relkind in ('%c', '%c', '%c', '%c', '%c', '%c')\n",
                          RELKIND_RELATION, RELKIND_SEQUENCE, RELKIND_VIEW,
-                         RELKIND_MATVIEW, RELKIND_FOREIGN_TABLE);
+                         RELKIND_MATVIEW, RELKIND_FOREIGN_TABLE,
+                         RELKIND_PARTITIONED_TABLE);
        processSQLNamePattern(GetConnection(fout), query, cell->val, true,
                              false, "n.nspname", "c.relname", NULL,
                              "pg_catalog.pg_table_is_visible(c.oid)");
@@ -2098,6 +2099,9 @@ makeTableDataInfo(DumpOptions *dopt, TableInfo *tbinfo, bool oids)
    /* Skip FOREIGN TABLEs (no data to dump) */
    if (tbinfo->relkind == RELKIND_FOREIGN_TABLE)
        return;
+   /* Skip partitioned tables (data in partitions) */
+   if (tbinfo->relkind == RELKIND_PARTITIONED_TABLE)
+       return;
 
    /* Don't dump data in unlogged tables, if so requested */
    if (tbinfo->relpersistence == RELPERSISTENCE_UNLOGGED &&
@@ -4993,7 +4997,7 @@ getTables(Archive *fout, int *numTables)
                          "(c.oid = pip.objoid "
                          "AND pip.classoid = 'pg_class'::regclass "
                          "AND pip.objsubid = 0) "
-                  "WHERE c.relkind in ('%c', '%c', '%c', '%c', '%c', '%c') "
+                  "WHERE c.relkind in ('%c', '%c', '%c', '%c', '%c', '%c', '%c') "
                          "ORDER BY c.oid",
                          acl_subquery->data,
                          racl_subquery->data,
@@ -5007,7 +5011,8 @@ getTables(Archive *fout, int *numTables)
                          RELKIND_SEQUENCE,
                          RELKIND_RELATION, RELKIND_SEQUENCE,
                          RELKIND_VIEW, RELKIND_COMPOSITE_TYPE,
-                         RELKIND_MATVIEW, RELKIND_FOREIGN_TABLE);
+                         RELKIND_MATVIEW, RELKIND_FOREIGN_TABLE,
+                         RELKIND_PARTITIONED_TABLE);
 
        destroyPQExpBuffer(acl_subquery);
        destroyPQExpBuffer(racl_subquery);
@@ -5535,7 +5540,9 @@ getTables(Archive *fout, int *numTables)
         * We only need to lock the table for certain components; see
         * pg_dump.h
         */
-       if (tblinfo[i].dobj.dump && tblinfo[i].relkind == RELKIND_RELATION &&
+       if (tblinfo[i].dobj.dump &&
+           (tblinfo[i].relkind == RELKIND_RELATION ||
+            tblinfo->relkind == RELKIND_PARTITIONED_TABLE) &&
            (tblinfo[i].dobj.dump & DUMP_COMPONENTS_REQUIRING_LOCK))
        {
            resetPQExpBuffer(query);
@@ -5635,9 +5642,16 @@ getInherits(Archive *fout, int *numInherits)
    /* Make sure we are in proper schema */
    selectSourceSchema(fout, "pg_catalog");
 
-   /* find all the inheritance information */
-
-   appendPQExpBufferStr(query, "SELECT inhrelid, inhparent FROM pg_inherits");
+   /*
+    * Find all the inheritance information, excluding implicit inheritance
+    * via partitioning.  We handle that case using getPartitions(), because
+    * we want more information about partitions than just the parent-child
+    * relationship.
+    */
+   appendPQExpBufferStr(query,
+                        "SELECT inhrelid, inhparent "
+                        "FROM pg_inherits "
+                        "WHERE inhparent NOT IN (SELECT oid FROM pg_class WHERE relkind = 'P')");
 
    res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
 
@@ -5663,6 +5677,70 @@ getInherits(Archive *fout, int *numInherits)
    return inhinfo;
 }
 
+/*
+ * getPartitions
+ *   read all the partition inheritance and partition bound information
+ * from the system catalogs return them in the PartInfo* structure
+ *
+ * numPartitions is set to the number of pairs read in
+ */
+PartInfo *
+getPartitions(Archive *fout, int *numPartitions)
+{
+   PGresult   *res;
+   int         ntups;
+   int         i;
+   PQExpBuffer query = createPQExpBuffer();
+   PartInfo    *partinfo;