<entry>template data for procedural languages</entry>
</row>
+ <row>
+ <entry><link linkend="catalog-pg-partitioned-table"><structname>pg_partitioned_table</structname></link></entry>
+ <entry>information about partition key of tables</entry>
+ </row>
+
<row>
<entry><link linkend="catalog-pg-policy"><structname>pg_policy</structname></link></entry>
<entry>row-security policies</entry>
<entry><type>char</type></entry>
<entry></entry>
<entry>
- <literal>r</> = ordinary table, <literal>i</> = index,
+ <literal>r</> = ordinary table, <literal>P</> = partitioned table,
+ <literal>i</> = index
<literal>S</> = sequence, <literal>v</> = view,
<literal>m</> = materialized view,
<literal>c</> = composite type, <literal>t</> = TOAST table,
</entry>
</row>
+ <row>
+ <entry><structfield>relispartition</structfield></entry>
+ <entry><type>bool</type></entry>
+ <entry></entry>
+ <entry>True if table is a partition</entry>
+ </row>
+
<row>
<entry><structfield>relfrozenxid</structfield></entry>
<entry><type>xid</type></entry>
Access-method-specific options, as <quote>keyword=value</> strings
</entry>
</row>
+
+ <row>
+ <entry><structfield>relpartbound</structfield></entry>
+ <entry><type>pg_node_tree</type></entry>
+ <entry></entry>
+ <entry>
+ If table is a partition (see <structfield>relispartition</structfield>),
+ internal representation of the partition bound
+ </entry>
+ </row>
</tbody>
</tgroup>
</table>
</sect1>
+ <sect1 id="catalog-pg-partitioned-table">
+ <title><structname>pg_partitioned_table</structname></title>
+
+ <indexterm zone="catalog-pg-partitioned-table">
+ <primary>pg_partitioned_table</primary>
+ </indexterm>
+
+ <para>
+ The catalog <structname>pg_partitioned_table</structname> stores
+ information about how tables are partitioned.
+ </para>
+
+ <table>
+ <title><structname>pg_partitioned_table</> Columns</title>
+
+ <tgroup cols="4">
+ <thead>
+ <row>
+ <entry>Name</entry>
+ <entry>Type</entry>
+ <entry>References</entry>
+ <entry>Description</entry>
+ </row>
+ </thead>
+
+ <tbody>
+
+ <row>
+ <entry><structfield>partrelid</structfield></entry>
+ <entry><type>oid</type></entry>
+ <entry><literal><link linkend="catalog-pg-class"><structname>pg_class</structname></link>.oid</literal></entry>
+ <entry>The OID of the <structname>pg_class</> entry for this partitioned table</entry>
+ </row>
+
+ <row>
+ <entry><structfield>partstrat</structfield></entry>
+ <entry><type>char</type></entry>
+ <entry></entry>
+ <entry>
+ Partitioning strategy; <literal>l</> = list partitioned table,
+ <literal>r</> = range partitioned table
+ </entry>
+ </row>
+
+ <row>
+ <entry><structfield>partnatts</structfield></entry>
+ <entry><type>int2</type></entry>
+ <entry></entry>
+ <entry>The number of columns in partition key</entry>
+ </row>
+
+ <row>
+ <entry><structfield>partattrs</structfield></entry>
+ <entry><type>int2vector</type></entry>
+ <entry><literal><link linkend="catalog-pg-attribute"><structname>pg_attribute</structname></link>.attnum</literal></entry>
+ <entry>
+ This is an array of <structfield>partnatts</structfield> values that
+ indicate which table columns are part of the partition key. For
+ example, a value of <literal>1 3</literal> would mean that the first
+ and the third table columns make up the partition key. A zero in this
+ array indicates that the corresponding partition key column is an
+ expression, rather than a simple column reference.
+ </entry>
+ </row>
+
+ <row>
+ <entry><structfield>partclass</structfield></entry>
+ <entry><type>oidvector</type></entry>
+ <entry><literal><link linkend="catalog-pg-opclass"><structname>pg_opclass</structname></link>.oid</literal></entry>
+ <entry>
+ For each column in the partition key, this contains the OID of the
+ operator class to use. See
+ <link linkend="catalog-pg-opclass"><structname>pg_opclass</structname></link> for details.
+ </entry>
+ </row>
+
+ <row>
+ <entry><structfield>partcollation</structfield></entry>
+ <entry><type>oidvector</type></entry>
+ <entry><literal><link linkend="catalog-pg-opclass"><structname>pg_opclass</structname></link>.oid</literal></entry>
+ <entry>
+ For each column in the partition key, this contains the OID of the
+ the collation to use for partitioning.
+ </entry>
+ </row>
+
+ <row>
+ <entry><structfield>partexprs</structfield></entry>
+ <entry><type>pg_node_tree</type></entry>
+ <entry></entry>
+ <entry>
+ Expression trees (in <function>nodeToString()</function>
+ representation) for partition key columns that are not simple column
+ references. This is a list with one element for each zero
+ entry in <structfield>partattrs</>. Null if all partition key columns
+ are simple references.
+ </entry>
+ </row>
+
+ </tbody>
+ </tgroup>
+ </table>
+ </sect1>
+
<sect1 id="catalog-pg-policy">
<title><structname>pg_policy</structname></title>
SET SCHEMA <replaceable class="PARAMETER">new_schema</replaceable>
ALTER TABLE ALL IN TABLESPACE <replaceable class="PARAMETER">name</replaceable> [ OWNED BY <replaceable class="PARAMETER">role_name</replaceable> [, ... ] ]
SET TABLESPACE <replaceable class="PARAMETER">new_tablespace</replaceable> [ NOWAIT ]
+ALTER TABLE [ IF EXISTS ] <replaceable class="PARAMETER">name</replaceable>
+ ATTACH PARTITION <replaceable class="PARAMETER">partition_name</replaceable> FOR VALUES <replaceable class="PARAMETER">partition_bound_spec</replaceable>
+ALTER TABLE [ IF EXISTS ] <replaceable class="PARAMETER">name</replaceable>
+ DETACH PARTITION <replaceable class="PARAMETER">partition_name</replaceable>
<phrase>where <replaceable class="PARAMETER">action</replaceable> is one of:</phrase>
values or to reject null values. You can only use <literal>SET
NOT NULL</> when the column contains no null values.
</para>
+
+ <para>
+ If this table is a partition, one cannot perform <literal>DROP NOT NULL</>
+ on a column if it is marked <literal>NOT NULL</literal> in the parent
+ table.
+ </para>
</listitem>
</varlistentry>
</listitem>
</varlistentry>
+ <varlistentry>
+ <term><literal>ATTACH PARTITION</literal> <replaceable class="PARAMETER">partition_name</replaceable> <replaceable class="PARAMETER">partition_bound_spec</replaceable></term>
+ <listitem>
+ <para>
+ This form attaches an existing table (which might itself be partitioned)
+ as a partition of the target table using the same syntax for
+ <replaceable class="PARAMETER">partition_bound_spec</replaceable> as
+ <xref linkend="sql-createtable">. The partition bound specification
+ must correspond to the partitioning strategy and partition key of the
+ target table. The table to be attached must have all the same columns
+ as the target table and no more; moreover, the column types must also
+ match. Also, it must have all the <literal>NOT NULL</literal> and
+ <literal>CHECK</literal> constraints of the target table. Currently
+ <literal>UNIQUE</literal>, <literal>PRIMARY KEY</literal>, and
+ <literal>FOREIGN KEY</literal> constraints are not considered.
+ If any of the <literal>CHECK</literal> constraints of the table being
+ attached is marked <literal>NO INHERIT</literal>, the command will fail;
+ such a constraint must be recreated without the <literal>NO INHERIT</literal>
+ clause.
+ </para>
+
+ <para>
+ A full table scan is performed on the table being attached to check that
+ no existing row in the table violates the partition constraint. It is
+ possible to avoid this scan by adding a valid <literal>CHECK</literal>
+ constraint to the table that would allow only the rows satisfying the
+ desired partition constraint before running this command. It will be
+ determined using such a constraint that the table need not be scanned
+ to validate the partition constraint. This does not work, however, if
+ any of the partition keys is an expression and the partition does not
+ accept <literal>NULL</literal> values. If attaching a list partition
+ that will not accept <literal>NULL</literal> values, also add
+ <literal>NOT NULL</literal> constraint to the partition key column,
+ unless it's an expression.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><literal>DETACH PARTITION</literal> <replaceable class="PARAMETER">partition_name</replaceable></term>
+ <listitem>
+ <para>
+ This form detaches specified partition of the target table. The detached
+ partition continues to exist as a standalone table, but no longer has any
+ ties to the table from which it was detached.
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
</para>
<para>
All the actions except <literal>RENAME</literal>,
- <literal>SET TABLESPACE</literal> and <literal>SET SCHEMA</literal>
- can be combined into
+ <literal>SET TABLESPACE</literal>, <literal>SET SCHEMA</literal>,
+ <literal>ATTACH PARTITION</literal>, and
+ <literal>DETACH PARTITION</literal> can be combined into
a list of multiple alterations to apply in parallel. For example, it
is possible to add several columns and/or alter the type of several
columns in a single command. This is particularly useful with large
You must own the table to use <command>ALTER TABLE</>.
To change the schema or tablespace of a table, you must also have
<literal>CREATE</literal> privilege on the new schema or tablespace.
- To add the table as a new child of a parent table, you must own the
- parent table as well.
+ To add the table as a new child of a parent table, you must own the parent
+ table as well. Also, to attach a table as a new partition of the table,
+ you must own the table being attached.
To alter the owner, you must also be a direct or indirect member of the new
owning role, and that role must have <literal>CREATE</literal> privilege on
the table's schema. (These restrictions enforce that altering the owner
</listitem>
</varlistentry>
+ <varlistentry>
+ <term><replaceable class="PARAMETER">partition_name</replaceable></term>
+ <listitem>
+ <para>
+ The name of the table to attach as a new partition or to detach from this table.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><replaceable class="PARAMETER">partition_bound_spec</replaceable></term>
+ <listitem>
+ <para>
+ The partition bound specification for a new partition. Refer to
+ <xref linkend="sql-createtable"> for more details on the syntax of the same.
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
</refsect1>
but does not require a table rewrite.
</para>
+ <para>
+ Similarly, when attaching a new partition it may be scanned to verify that
+ existing rows meet the partition constraint.
+ </para>
+
<para>
The main reason for providing the option to specify multiple changes
in a single <command>ALTER TABLE</> is that multiple table scans or
COLUMN</literal> (i.e., <command>ALTER TABLE ONLY ... DROP
COLUMN</command>) never removes any descendant columns, but
instead marks them as independently defined rather than inherited.
+ A nonrecursive <literal>DROP COLUMN</literal> command will fail for a
+ partitioned table, because all partitions of a table must have the same
+ columns as the partitioning root.
</para>
<para>
ADD CONSTRAINT distributors_pkey PRIMARY KEY USING INDEX dist_id_temp_idx;
</programlisting></para>
+ <para>
+ Attach a partition to range partitioned table:
+<programlisting>
+ALTER TABLE measurement
+ ATTACH PARTITION measurement_y2016m07 FOR VALUES FROM ('2016-07-01') TO ('2016-08-01');
+</programlisting></para>
+
+ <para>
+ Attach a partition to list partitioned table:
+<programlisting>
+ALTER TABLE cities
+ ATTACH PARTITION cities_west FOR VALUES IN ('Los Angeles', 'San Francisco');
+</programlisting></para>
+
+ <para>
+ Detach a partition from partitioned table:
+<programlisting>
+ALTER TABLE cities
+ DETACH PARTITION measurement_y2015m12;
+</programlisting></para>
+
</refsect1>
<refsect1>
SERVER <replaceable class="parameter">server_name</replaceable>
[ OPTIONS ( <replaceable class="PARAMETER">option</replaceable> '<replaceable class="PARAMETER">value</replaceable>' [, ... ] ) ]
+CREATE FOREIGN TABLE [ IF NOT EXISTS ] <replaceable class="PARAMETER">table_name</replaceable>
+ PARTITION OF <replaceable class="PARAMETER">parent_table</replaceable> [ (
+ { <replaceable class="PARAMETER">column_name</replaceable> WITH OPTIONS [ <replaceable class="PARAMETER">column_constraint</replaceable> [ ... ] ]
+ | <replaceable>table_constraint</replaceable> }
+ [, ... ]
+) ] <replaceable class="PARAMETER">partition_bound_spec</replaceable>
+ SERVER <replaceable class="parameter">server_name</replaceable>
+[ OPTIONS ( <replaceable class="PARAMETER">option</replaceable> '<replaceable class="PARAMETER">value</replaceable>' [, ... ] ) ]
+
<phrase>where <replaceable class="PARAMETER">column_constraint</replaceable> is:</phrase>
[ CONSTRAINT <replaceable class="PARAMETER">constraint_name</replaceable> ]
name as any existing data type in the same schema.
</para>
+ <para>
+ If <literal>PARTITION OF</literal> clause is specified then the table is
+ created as a partition of <literal>parent_table</literal> with specified
+ bounds.
+ </para>
+
<para>
To be able to create a foreign table, you must have <literal>USAGE</literal>
privilege on the foreign server, as well as <literal>USAGE</literal>
SERVER film_server;
</programlisting></para>
+ <para>
+ Create foreign table <structname>measurement_y2016m07</>, which will be
+ accessed through the server <structname>server_07</>, as a partition
+ of the range partitioned table <structname>measurement</>:
+
+<programlisting>
+CREATE FOREIGN TABLE measurement_y2016m07
+ PARTITION OF measurement FOR VALUES FROM ('2016-07-01') TO ('2016-08-01')
+ SERVER server_07;
+</programlisting></para>
+
</refsect1>
<refsect1 id="SQL-CREATEFOREIGNTABLE-compatibility">
[, ... ]
] )
[ INHERITS ( <replaceable>parent_table</replaceable> [, ... ] ) ]
+[ PARTITION BY { RANGE | LIST } ( { <replaceable class="parameter">column_name</replaceable> | ( <replaceable class="parameter">expression</replaceable> ) } [ COLLATE <replaceable class="parameter">collation</replaceable> ] [ <replaceable class="parameter">opclass</replaceable> ] [, ... ] ) ]
[ WITH ( <replaceable class="PARAMETER">storage_parameter</replaceable> [= <replaceable class="PARAMETER">value</replaceable>] [, ... ] ) | WITH OIDS | WITHOUT OIDS ]
[ ON COMMIT { PRESERVE ROWS | DELETE ROWS | DROP } ]
[ TABLESPACE <replaceable class="PARAMETER">tablespace_name</replaceable> ]
| <replaceable>table_constraint</replaceable> }
[, ... ]
) ]
+[ PARTITION BY { RANGE | LIST } ( { <replaceable class="parameter">column_name</replaceable> | ( <replaceable class="parameter">expression</replaceable> ) } [ COLLATE <replaceable class="parameter">collation</replaceable> ] [ <replaceable class="parameter">opclass</replaceable> ] [, ... ] ) ]
+[ WITH ( <replaceable class="PARAMETER">storage_parameter</replaceable> [= <replaceable class="PARAMETER">value</replaceable>] [, ... ] ) | WITH OIDS | WITHOUT OIDS ]
+[ ON COMMIT { PRESERVE ROWS | DELETE ROWS | DROP } ]
+[ TABLESPACE <replaceable class="PARAMETER">tablespace_name</replaceable> ]
+
+CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXISTS ] <replaceable class="PARAMETER">table_name</replaceable>
+ PARTITION OF <replaceable class="PARAMETER">parent_table</replaceable> [ (
+ { <replaceable class="PARAMETER">column_name</replaceable> [ <replaceable class="PARAMETER">column_constraint</replaceable> [ ... ] ]
+ | <replaceable>table_constraint</replaceable> }
+ [, ... ]
+) ] FOR VALUES <replaceable class="PARAMETER">partition_bound_spec</replaceable>
+[ PARTITION BY { RANGE | LIST } ( { <replaceable class="parameter">column_name</replaceable> | ( <replaceable class="parameter">expression</replaceable> ) } [ COLLATE <replaceable class="parameter">collation</replaceable> ] [ <replaceable class="parameter">opclass</replaceable> ] [, ... ] ) ]
[ WITH ( <replaceable class="PARAMETER">storage_parameter</replaceable> [= <replaceable class="PARAMETER">value</replaceable>] [, ... ] ) | WITH OIDS | WITHOUT OIDS ]
[ ON COMMIT { PRESERVE ROWS | DELETE ROWS | DROP } ]
[ TABLESPACE <replaceable class="PARAMETER">tablespace_name</replaceable> ]
{ INCLUDING | EXCLUDING } { DEFAULTS | CONSTRAINTS | INDEXES | STORAGE | COMMENTS | ALL }
+<phrase>and <replaceable class="PARAMETER">partition_bound_spec</replaceable> is:</phrase>
+
+{ IN ( <replaceable class="PARAMETER">expression</replaceable> [, ...] ) |
+ FROM ( { <replaceable class="PARAMETER">expression</replaceable> | UNBOUNDED } [, ...] ) TO ( { <replaceable class="PARAMETER">expression</replaceable> | UNBOUNDED } [, ...] ) }
+
<phrase><replaceable class="PARAMETER">index_parameters</replaceable> in <literal>UNIQUE</literal>, <literal>PRIMARY KEY</literal>, and <literal>EXCLUDE</literal> constraints are:</phrase>
[ WITH ( <replaceable class="PARAMETER">storage_parameter</replaceable> [= <replaceable class="PARAMETER">value</replaceable>] [, ... ] ) ]
</listitem>
</varlistentry>
+ <varlistentry>
+ <term><literal>PARTITION OF <replaceable class="PARAMETER">parent_table</replaceable></literal></term>
+ <listitem>
+ <para>
+ Creates the table as <firstterm>partition</firstterm> of the specified
+ parent table.
+ </para>
+
+ <para>
+ The partition bound specification must correspond to the partitioning
+ method and partition key of the parent table, and must not overlap with
+ any existing partition of that parent.
+ </para>
+
+ <para>
+ A partition cannot have columns other than those inherited from the
+ parent. That includes the <structfield>oid</> column, which can be
+ specified using the <literal>WITH (OIDS)</literal> clause.
+ Defaults and constraints can optionally be specified for each of the
+ inherited columns. One can also specify table constraints in addition
+ to those inherited from the parent. If a check constraint with the name
+ matching one of the parent's constraint is specified, it is merged with
+ the latter, provided the specified condition is same.
+ </para>
+
+ <para>
+ Rows inserted into a partitioned table will be automatically routed to
+ the correct partition. If no suitable partition exists, an error will
+ occur.
+ </para>
+
+ <para>
+ A partition must have the same column names and types as the table of
+ which it is a partition. Therefore, modifications to the column names
+ or types of the partitioned table will automatically propagate to all
+ children, as will operations such as TRUNCATE which normally affect a
+ table and all of its inheritance children. It is also possible to
+ TRUNCATE a partition individually, just as for an inheritance child.
+ Note that dropping a partition with <literal>DROP TABLE</literal>
+ requires taking an <literal>ACCESS EXCLUSIVE</literal> lock on the
+ parent table.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry>
<term><replaceable class="PARAMETER">column_name</replaceable></term>
<listitem>
</listitem>
</varlistentry>
+ <varlistentry>
+ <term><literal>PARTITION BY { RANGE | LIST } ( { <replaceable class="parameter">column_name</replaceable> | ( <replaceable class="parameter">expression</replaceable> ) } [ <replaceable class="parameter">opclass</replaceable> ] [, ...] ) </literal></term>
+ <listitem>
+ <para>
+ The optional <literal>PARTITION BY</literal> clause specifies a strategy
+ of partitioning the table. The table thus created is called a
+ <firstterm>partitioned</firstterm> table. The parenthesized list of
+ columns or expressions forms the <firstterm>partition key</firstterm>
+ for the table. When using range partitioning, the partition key can
+ include multiple columns or expressions, but for list partitioning, the
+ partition key must consist of a single column or expression. If no
+ btree operator class is specified when creating a partitioned table,
+ the default btree operator class for the datatype will be used. If
+ there is none, an error will be reported.
+ </para>
+
+ <para>
+ A partitioned table is divided into sub-tables (called partitions),
+ which are created using separate <literal>CREATE TABLE</> commands.
+ The partitioned table is itself empty. A data row inserted into the
+ table is routed to a partition based on the value of columns or
+ expressions in the partition key. If no existing partition matches
+ the values in the new row, an error will be reported.
+ </para>
+
+ <para>
+ Partitioned tables do not support <literal>UNIQUE</literal>,
+ <literal>PRIMARY KEY</literal>, <literal>EXCLUDE</literal>, or
+ <literal>FOREIGN KEY</literal> constraints; however, you can define
+ these constraints on individual partitions.
+ </para>
+
+ <para>
+ When using range partitioning, a <literal>NOT NULL</literal> constraint
+ is added to each non-expression column in the partition key.
+ </para>
+
+ </listitem>
+ </varlistentry>
+
<varlistentry>
<term><literal>LIKE <replaceable>source_table</replaceable> [ <replaceable>like_option</replaceable> ... ]</literal></term>
<listitem>
PRIMARY KEY (name),
salary WITH OPTIONS DEFAULT 1000
);
+</programlisting></para>
+
+ <para>
+ Create a range partitioned table:
+<programlisting>
+CREATE TABLE measurement (
+ city_id int not null,
+ logdate date not null,
+ peaktemp int,
+ unitsales int
+) PARTITION BY RANGE (logdate);
+</programlisting></para>
+
+ <para>
+ Create a list partitioned table:
+<programlisting>
+CREATE TABLE cities (
+ name text not null,
+ population int,
+) PARTITION BY LIST (initcap(name));
+</programlisting></para>
+
+ <para>
+ Create partition of a range partitioned table:
+<programlisting>
+CREATE TABLE measurement_y2016m07
+ PARTITION OF measurement (
+ unitsales WITH OPTIONS DEFAULT 0
+) FOR VALUES FROM ('2016-07-01') TO ('2016-08-01');
+</programlisting></para>
+
+ <para>
+ Create partition of a list partitioned table:
+<programlisting>
+CREATE TABLE cities_west
+ PARTITION OF cities (
+ CONSTRAINT city_id_nonzero CHECK (city_id != 0)
+) FOR VALUES IN ('Los Angeles', 'San Francisco');
+</programlisting></para>
+
+ <para>
+ Create partition of a list partitioned table that is itself further
+ partitioned and then add a partition to it:
+<programlisting>
+CREATE TABLE cities_west
+ PARTITION OF cities (
+ CONSTRAINT city_id_nonzero CHECK (city_id != 0)
+) FOR VALUES IN ('Los Angeles', 'San Francisco') PARTITION BY RANGE (population);
+
+CREATE TABLE cities_west_10000_to_100000
+ PARTITION OF cities_west FOR VALUES FROM (10000) TO (100000);
</programlisting></para>
</refsect1>
case RELKIND_RELATION:
case RELKIND_TOASTVALUE:
case RELKIND_MATVIEW:
+ case RELKIND_PARTITIONED_TABLE:
options = heap_reloptions(classForm->relkind, datum, false);
break;
case RELKIND_VIEW:
return (bytea *) rdopts;
case RELKIND_RELATION:
case RELKIND_MATVIEW:
+ case RELKIND_PARTITIONED_TABLE:
return default_reloptions(reloptions, validate, RELOPT_KIND_HEAP);
default:
/* other relkinds are not supported */
include $(top_builddir)/src/Makefile.global
OBJS = catalog.o dependency.o heap.o index.o indexing.o namespace.o aclchk.o \
- objectaccess.o objectaddress.o pg_aggregate.o pg_collation.o \
+ objectaccess.o objectaddress.o partition.o pg_aggregate.o pg_collation.o \
pg_constraint.o pg_conversion.o \
pg_depend.o pg_enum.o pg_inherits.o pg_largeobject.o pg_namespace.o \
pg_operator.o pg_proc.o pg_range.o pg_db_role_setting.o pg_shdepend.o \
pg_foreign_data_wrapper.h pg_foreign_server.h pg_user_mapping.h \
pg_foreign_table.h pg_policy.h pg_replication_origin.h \
pg_default_acl.h pg_init_privs.h pg_seclabel.h pg_shseclabel.h \
- pg_collation.h pg_range.h pg_transform.h \
+ pg_collation.h pg_partitioned_table.h pg_range.h pg_transform.h \
toasting.h indexing.h \
)
objects = list_concat(objects, objs);
objs = getRelationsInNamespace(namespaceId, RELKIND_FOREIGN_TABLE);
objects = list_concat(objects, objs);
+ objs = getRelationsInNamespace(namespaceId, RELKIND_PARTITIONED_TABLE);
+ objects = list_concat(objects, objs);
break;
case ACL_OBJECT_SEQUENCE:
objs = getRelationsInNamespace(namespaceId, RELKIND_SEQUENCE);
recordDependencyOnSingleRelExpr(const ObjectAddress *depender,
Node *expr, Oid relId,
DependencyType behavior,
- DependencyType self_behavior)
+ DependencyType self_behavior,
+ bool ignore_self)
{
find_expr_references_context context;
RangeTblEntry rte;
context.addrs->numrefs = outrefs;
/* Record the self-dependencies */
- recordMultipleDependencies(depender,
- self_addrs->refs, self_addrs->numrefs,
- self_behavior);
+ if (!ignore_self)
+ recordMultipleDependencies(depender,
+ self_addrs->refs, self_addrs->numrefs,
+ self_behavior);
free_object_addresses(self_addrs);
}
#include "catalog/heap.h"
#include "catalog/index.h"
#include "catalog/objectaccess.h"
+#include "catalog/partition.h"
#include "catalog/pg_attrdef.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_constraint.h"
#include "catalog/pg_foreign_table.h"
#include "catalog/pg_inherits.h"
#include "catalog/pg_namespace.h"
+#include "catalog/pg_opclass.h"
+#include "catalog/pg_partitioned_table.h"
#include "catalog/pg_statistic.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_type.h"
values[Anum_pg_class_relhassubclass - 1] = BoolGetDatum(rd_rel->relhassubclass);
values[Anum_pg_class_relispopulated - 1] = BoolGetDatum(rd_rel->relispopulated);
values[Anum_pg_class_relreplident - 1] = CharGetDatum(rd_rel->relreplident);
+ values[Anum_pg_class_relispartition - 1] = BoolGetDatum(rd_rel->relispartition);
values[Anum_pg_class_relfrozenxid - 1] = TransactionIdGetDatum(rd_rel->relfrozenxid);
values[Anum_pg_class_relminmxid - 1] = MultiXactIdGetDatum(rd_rel->relminmxid);
if (relacl != (Datum) 0)
else
nulls[Anum_pg_class_reloptions - 1] = true;
+ /* relpartbound is set by updating this tuple, if necessary */
+ nulls[Anum_pg_class_relpartbound - 1] = true;
+
tup = heap_form_tuple(RelationGetDescr(pg_class_desc), values, nulls);
/*
new_rel_reltup->reltype = new_type_oid;
new_rel_reltup->reloftype = reloftype;
+ /* relispartition is always set by updating this tuple later */
+ new_rel_reltup->relispartition = false;
+
new_rel_desc->rd_att->tdtypeid = new_type_oid;
/* Now build and insert the tuple */
if (IsBinaryUpgrade &&
(relkind == RELKIND_RELATION || relkind == RELKIND_SEQUENCE ||
relkind == RELKIND_VIEW || relkind == RELKIND_MATVIEW ||
- relkind == RELKIND_COMPOSITE_TYPE || relkind == RELKIND_FOREIGN_TABLE))
+ relkind == RELKIND_COMPOSITE_TYPE || relkind == RELKIND_FOREIGN_TABLE ||
+ relkind == RELKIND_PARTITIONED_TABLE))
{
if (!OidIsValid(binary_upgrade_next_heap_pg_class_oid))
ereport(ERROR,
case RELKIND_VIEW:
case RELKIND_MATVIEW:
case RELKIND_FOREIGN_TABLE:
+ case RELKIND_PARTITIONED_TABLE:
relacl = get_user_default_acl(ACL_OBJECT_RELATION, ownerid,
relnamespace);
break;
relkind == RELKIND_VIEW ||
relkind == RELKIND_MATVIEW ||
relkind == RELKIND_FOREIGN_TABLE ||
- relkind == RELKIND_COMPOSITE_TYPE))
+ relkind == RELKIND_COMPOSITE_TYPE ||
+ relkind == RELKIND_PARTITIONED_TABLE))
new_array_oid = AssignTypeArrayOid();
/*
if (relpersistence == RELPERSISTENCE_UNLOGGED)
{
Assert(relkind == RELKIND_RELATION || relkind == RELKIND_MATVIEW ||
- relkind == RELKIND_TOASTVALUE);
+ relkind == RELKIND_TOASTVALUE ||
+ relkind == RELKIND_PARTITIONED_TABLE);
+
heap_create_init_fork(new_rel_desc);
}
heap_drop_with_catalog(Oid relid)
{
Relation rel;
+ Oid parentOid;
+ Relation parent = NULL;
/*
* Open and lock the relation.
*/
rel = relation_open(relid, AccessExclusiveLock);
+ /*
+ * If the relation is a partition, we must grab exclusive lock on its
+ * parent because we need to update its partition descriptor. We must
+ * take a table lock strong enough to prevent all queries on the parent
+ * from proceeding until we commit and send out a shared-cache-inval
+ * notice that will make them update their partition descriptor.
+ * Sometimes, doing this is cycles spent uselessly, especially if the
+ * parent will be dropped as part of the same command anyway.
+ */
+ if (rel->rd_rel->relispartition)
+ {
+ parentOid = get_partition_parent(relid);
+ parent = heap_open(parentOid, AccessExclusiveLock);
+ }
+
/*
* There can no longer be anyone *else* touching the relation, but we
* might still have open queries or cursors, or pending trigger events, in
heap_close(rel, RowExclusiveLock);
}
+ /*
+ * If a partitioned table, delete the pg_partitioned_table tuple.
+ */
+ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ RemovePartitionKeyByRelId(relid);
+
/*
* Schedule unlinking of the relation's physical files at commit.
*/
* delete relation tuple
*/
DeleteRelationTuple(relid);
+
+ if (parent)
+ {
+ CacheInvalidateRelcache(parent);
+ heap_close(parent, NoLock); /* keep the lock */
+ }
}
else
attNos = NULL;
+ /*
+ * Partitioned tables do not contain any rows themselves, so a NO INHERIT
+ * constraint makes no sense.
+ */
+ if (is_no_inherit &&
+ rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("cannot add NO INHERIT constraint to partitioned table \"%s\"",
+ RelationGetRelationName(rel))));
+
/*
* Create the Check Constraint
*/
* definition) then interpret addition of a local constraint as a
* legal merge. This allows ALTER ADD CONSTRAINT on parent and
* child tables to be given in either order with same end state.
+ * However if the relation is a partition, all inherited
+ * constraints are always non-local, including those that were
+ * merged.
*/
- if (is_local && !con->conislocal)
+ if (is_local && !con->conislocal && !rel->rd_rel->relispartition)
allow_merge = true;
if (!found || !allow_merge)
tup = heap_copytuple(tup);
con = (Form_pg_constraint) GETSTRUCT(tup);
- if (is_local)
- con->conislocal = true;
+ /*
+ * In case of partitions, an inherited constraint must be
+ * inherited only once since it cannot have multiple parents and
+ * it is never considered local.
+ */
+ if (rel->rd_rel->relispartition)
+ {
+ con->coninhcount = 1;
+ con->conislocal = false;
+ }
else
- con->coninhcount++;
+ {
+ if (is_local)
+ con->conislocal = true;
+ else
+ con->coninhcount++;
+ }
+
if (is_no_inherit)
{
Assert(is_local);
lappend_cell_oid(list, prev, datum);
return list;
}
+
+/*
+ * StorePartitionKey
+ * Store information about the partition key rel into the catalog
+ */
+void
+StorePartitionKey(Relation rel,
+ char strategy,
+ int16 partnatts,
+ AttrNumber *partattrs,
+ List *partexprs,
+ Oid *partopclass,
+ Oid *partcollation)
+{
+ int i;
+ int2vector *partattrs_vec;
+ oidvector *partopclass_vec;
+ oidvector *partcollation_vec;
+ Datum partexprDatum;
+ Relation pg_partitioned_table;
+ HeapTuple tuple;
+ Datum values[Natts_pg_partitioned_table];
+ bool nulls[Natts_pg_partitioned_table];
+ ObjectAddress myself;
+ ObjectAddress referenced;
+
+ Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
+
+ tuple = SearchSysCache1(PARTRELID,
+ ObjectIdGetDatum(RelationGetRelid(rel)));
+
+ /* Copy the partition attribute numbers, opclass OIDs into arrays */
+ partattrs_vec = buildint2vector(partattrs, partnatts);
+ partopclass_vec = buildoidvector(partopclass, partnatts);
+ partcollation_vec = buildoidvector(partcollation, partnatts);
+
+ /* Convert the expressions (if any) to a text datum */
+ if (partexprs)
+ {
+ char *exprString;
+
+ exprString = nodeToString(partexprs);
+ partexprDatum = CStringGetTextDatum(exprString);
+ pfree(exprString);
+ }
+ else
+ partexprDatum = (Datum) 0;
+
+ pg_partitioned_table = heap_open(PartitionedRelationId, RowExclusiveLock);
+
+ MemSet(nulls, false, sizeof(nulls));
+
+ /* Only this can ever be NULL */
+ if (!partexprDatum)
+ nulls[Anum_pg_partitioned_table_partexprs - 1] = true;
+
+ values[Anum_pg_partitioned_table_partrelid - 1] = ObjectIdGetDatum(RelationGetRelid(rel));
+ values[Anum_pg_partitioned_table_partstrat - 1] = CharGetDatum(strategy);
+ values[Anum_pg_partitioned_table_partnatts - 1] = Int16GetDatum(partnatts);
+ values[Anum_pg_partitioned_table_partattrs - 1] = PointerGetDatum(partattrs_vec);
+ values[Anum_pg_partitioned_table_partclass - 1] = PointerGetDatum(partopclass_vec);
+ values[Anum_pg_partitioned_table_partcollation - 1] = PointerGetDatum(partcollation_vec);
+ values[Anum_pg_partitioned_table_partexprs - 1] = partexprDatum;
+
+ tuple = heap_form_tuple(RelationGetDescr(pg_partitioned_table), values, nulls);
+
+ simple_heap_insert(pg_partitioned_table, tuple);
+
+ /* Update the indexes on pg_partitioned_table */
+ CatalogUpdateIndexes(pg_partitioned_table, tuple);
+ heap_close(pg_partitioned_table, RowExclusiveLock);
+
+ /* Mark this relation as dependent on a few things as follows */
+ myself.classId = RelationRelationId;
+ myself.objectId = RelationGetRelid(rel);;
+ myself.objectSubId = 0;
+
+ /* Operator class and collation per key column */
+ for (i = 0; i < partnatts; i++)
+ {
+ referenced.classId = OperatorClassRelationId;
+ referenced.objectId = partopclass[i];
+ referenced.objectSubId = 0;
+
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+
+ referenced.classId = CollationRelationId;
+ referenced.objectId = partcollation[i];
+ referenced.objectSubId = 0;
+
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+ }
+
+ /*
+ * Anything mentioned in the expressions. We must ignore the column
+ * references, which will depend on the table itself; there is no
+ * separate partition key object.
+ */
+ if (partexprs)
+ recordDependencyOnSingleRelExpr(&myself,
+ (Node *) partexprs,
+ RelationGetRelid(rel),
+ DEPENDENCY_NORMAL,
+ DEPENDENCY_AUTO, true);
+
+ /*
+ * We must invalidate the relcache so that the next
+ * CommandCounterIncrement() will cause the same to be rebuilt using the
+ * information in just created catalog entry.
+ */
+ CacheInvalidateRelcache(rel);
+}
+
+/*
+ * RemovePartitionKeyByRelId
+ * Remove pg_partitioned_table entry for a relation
+ */
+void
+RemovePartitionKeyByRelId(Oid relid)
+{
+ Relation rel;
+ HeapTuple tuple;
+
+ rel = heap_open(PartitionedRelationId, RowExclusiveLock);
+
+ tuple = SearchSysCache1(PARTRELID, ObjectIdGetDatum(relid));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for partition key of relation %u",
+ relid);
+
+ simple_heap_delete(rel, &tuple->t_self);
+
+ ReleaseSysCache(tuple);
+ heap_close(rel, RowExclusiveLock);
+}
+
+/*
+ * StorePartitionBound
+ * Update pg_class tuple of rel to store the partition bound and set
+ * relispartition to true
+ */
+void
+StorePartitionBound(Relation rel, Node *bound)
+{
+ Relation classRel;
+ HeapTuple tuple,
+ newtuple;
+ Datum new_val[Natts_pg_class];
+ bool new_null[Natts_pg_class],
+ new_repl[Natts_pg_class];
+
+ /* Update pg_class tuple */
+ classRel = heap_open(RelationRelationId, RowExclusiveLock);
+ tuple = SearchSysCacheCopy1(RELOID,
+ ObjectIdGetDatum(RelationGetRelid(rel)));
+#ifdef USE_ASSERT_CHECKING
+ {
+ Form_pg_class classForm;
+ bool isnull;
+
+ classForm = (Form_pg_class) GETSTRUCT(tuple);
+ Assert(!classForm->relispartition);
+ (void) SysCacheGetAttr(RELOID, tuple, Anum_pg_class_relpartbound,
+ &isnull);
+ Assert(isnull);
+ }
+#endif
+
+ /* Fill in relpartbound value */
+ memset(new_val, 0, sizeof(new_val));
+ memset(new_null, false, sizeof(new_null));
+ memset(new_repl, false, sizeof(new_repl));
+ new_val[Anum_pg_class_relpartbound - 1] = CStringGetTextDatum(nodeToString(bound));
+ new_null[Anum_pg_class_relpartbound - 1] = false;
+ new_repl[Anum_pg_class_relpartbound - 1] = true;
+ newtuple = heap_modify_tuple(tuple, RelationGetDescr(classRel),
+ new_val, new_null, new_repl);
+ /* Also set the flag */
+ ((Form_pg_class) GETSTRUCT(newtuple))->relispartition = true;
+ simple_heap_update(classRel, &newtuple->t_self, newtuple);
+ CatalogUpdateIndexes(classRel, newtuple);
+ heap_freetuple(newtuple);
+ heap_close(classRel, RowExclusiveLock);
+}
(Node *) indexInfo->ii_Expressions,
heapRelationId,
DEPENDENCY_NORMAL,
- DEPENDENCY_AUTO);
+ DEPENDENCY_AUTO, false);
}
/* Store dependencies on anything mentioned in predicate */
(Node *) indexInfo->ii_Predicate,
heapRelationId,
DEPENDENCY_NORMAL,
- DEPENDENCY_AUTO);
+ DEPENDENCY_AUTO, false);
}
}
else
RelationGetRelationName(relation))));
break;
case OBJECT_TABLE:
- if (relation->rd_rel->relkind != RELKIND_RELATION)
+ if (relation->rd_rel->relkind != RELKIND_RELATION &&
+ relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is not a table",
switch (relForm->relkind)
{
case RELKIND_RELATION:
+ case RELKIND_PARTITIONED_TABLE:
appendStringInfo(buffer, _("table %s"),
relname);
break;
switch (relForm->relkind)
{
case RELKIND_RELATION:
+ case RELKIND_PARTITIONED_TABLE:
appendStringInfoString(buffer, "table");
break;
case RELKIND_INDEX:
--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * partition.c
+ * Partitioning related data structures and functions.
+ *
+ * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/catalog/partition.c
+ *
+ *-------------------------------------------------------------------------
+*/
+
+#include "postgres.h"
+
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "access/nbtree.h"
+#include "access/sysattr.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/objectaddress.h"
+#include "catalog/partition.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_inherits.h"
+#include "catalog/pg_inherits_fn.h"
+#include "catalog/pg_opclass.h"
+#include "catalog/pg_type.h"
+#include "executor/executor.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/parsenodes.h"
+#include "optimizer/clauses.h"
+#include "optimizer/planmain.h"
+#include "optimizer/var.h"
+#include "rewrite/rewriteManip.h"
+#include "storage/lmgr.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/datum.h"
+#include "utils/memutils.h"
+#include "utils/fmgroids.h"
+#include "utils/inval.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/ruleutils.h"
+#include "utils/syscache.h"
+
+/*
+ * Information about bounds of a partitioned relation
+ *
+ * A list partition datum that is known to be NULL is never put into the
+ * datums array. Instead, it is tracked using has_null and null_index fields.
+ *
+ * In the case of range partitioning, ndatums will typically be far less than
+ * 2 * nparts, because a partition's upper bound and the next partition's lower
+ * bound are the same in most common cases, and we only store one of them.
+ *
+ * In the case of list partitioning, the indexes array stores one entry for
+ * every datum, which is the index of the partition that accepts a given datum.
+ * In case of range partitioning, it stores one entry per distinct range
+ * datum, which is the index of the partition for which a given datum
+ * is an upper bound.
+ */
+
+/* Ternary value to represent what's contained in a range bound datum */
+typedef enum RangeDatumContent
+{
+ RANGE_DATUM_FINITE = 0, /* actual datum stored elsewhere */
+ RANGE_DATUM_NEG_INF, /* negative infinity */
+ RANGE_DATUM_POS_INF /* positive infinity */
+} RangeDatumContent;
+
+typedef struct PartitionBoundInfoData
+{
+ char strategy; /* list or range bounds? */
+ int ndatums; /* Length of the datums following array */
+ Datum **datums; /* Array of datum-tuples with key->partnatts
+ * datums each */
+ RangeDatumContent **content;/* what's contained in each range bound datum?
+ * (see the above enum); NULL for list
+ * partitioned tables */
+ int *indexes; /* Partition indexes; one entry per member of
+ * the datums array (plus one if range
+ * partitioned table) */
+ bool has_null; /* Is there a null-accepting partition? false
+ * for range partitioned tables */
+ int null_index; /* Index of the null-accepting partition; -1
+ * for range partitioned tables */
+} PartitionBoundInfoData;
+
+/*
+ * When qsort'ing partition bounds after reading from the catalog, each bound
+ * is represented with one of the following structs.
+ */
+
+/* One value coming from some (index'th) list partition */
+typedef struct PartitionListValue
+{
+ int index;
+ Datum value;
+} PartitionListValue;
+
+/* One bound of a range partition */
+typedef struct PartitionRangeBound
+{
+ int index;
+ Datum *datums; /* range bound datums */
+ RangeDatumContent *content; /* what's contained in each datum? */
+ bool lower; /* this is the lower (vs upper) bound */
+} PartitionRangeBound;
+
+static int32 qsort_partition_list_value_cmp(const void *a, const void *b,
+ void *arg);
+static int32 qsort_partition_rbound_cmp(const void *a, const void *b,
+ void *arg);
+
+static List *get_qual_for_list(PartitionKey key, PartitionBoundSpec *spec);
+static List *get_qual_for_range(PartitionKey key, PartitionBoundSpec *spec);
+static Oid get_partition_operator(PartitionKey key, int col,
+ StrategyNumber strategy, bool *need_relabel);
+static List *generate_partition_qual(Relation rel, bool recurse);
+
+static PartitionRangeBound *make_one_range_bound(PartitionKey key, int index,
+ List *datums, bool lower);
+static int32 partition_rbound_cmp(PartitionKey key,
+ Datum *datums1, RangeDatumContent *content1, bool lower1,
+ PartitionRangeBound *b2);
+static int32 partition_rbound_datum_cmp(PartitionKey key,
+ Datum *rb_datums, RangeDatumContent *rb_content,
+ Datum *tuple_datums);
+
+static int32 partition_bound_cmp(PartitionKey key,
+ PartitionBoundInfo boundinfo,
+ int offset, void *probe, bool probe_is_bound);
+static int partition_bound_bsearch(PartitionKey key,
+ PartitionBoundInfo boundinfo,
+ void *probe, bool probe_is_bound, bool *is_equal);
+
+/* Support get_partition_for_tuple() */
+static void FormPartitionKeyDatum(PartitionDispatch pd,
+ TupleTableSlot *slot,
+ EState *estate,
+ Datum *values,
+ bool *isnull);
+
+/*
+ * RelationBuildPartitionDesc
+ * Form rel's partition descriptor
+ *
+ * Not flushed from the cache by RelationClearRelation() unless changed because
+ * of addition or removal of partition.
+ */
+void
+RelationBuildPartitionDesc(Relation rel)
+{
+ List *inhoids,
+ *partoids;
+ Oid *oids = NULL;
+ List *boundspecs = NIL;
+ ListCell *cell;
+ int i,
+ nparts;
+ PartitionKey key = RelationGetPartitionKey(rel);
+ PartitionDesc result;
+ MemoryContext oldcxt;
+
+ int ndatums = 0;
+
+ /* List partitioning specific */
+ PartitionListValue **all_values = NULL;
+ bool found_null = false;
+ int null_index = -1;
+
+ /* Range partitioning specific */
+ PartitionRangeBound **rbounds = NULL;
+
+ /*
+ * The following could happen in situations where rel has a pg_class entry
+ * but not the pg_partitioned_table entry yet.
+ */
+ if (key == NULL)
+ return;
+
+ /* Get partition oids from pg_inherits */
+ inhoids = find_inheritance_children(RelationGetRelid(rel), NoLock);
+
+ /* Collect bound spec nodes in a list */
+ i = 0;
+ partoids = NIL;
+ foreach(cell, inhoids)
+ {
+ Oid inhrelid = lfirst_oid(cell);
+ HeapTuple tuple;
+ Datum datum;
+ bool isnull;
+ Node *boundspec;
+
+ tuple = SearchSysCache1(RELOID, inhrelid);
+
+ /*
+ * It is possible that the pg_class tuple of a partition has not been
+ * updated yet to set its relpartbound field. The only case where
+ * this happens is when we open the parent relation to check using its
+ * partition descriptor that a new partition's bound does not overlap
+ * some existing partition.
+ */
+ if (!((Form_pg_class) GETSTRUCT(tuple))->relispartition)
+ {
+ ReleaseSysCache(tuple);
+ continue;
+ }
+
+ datum = SysCacheGetAttr(RELOID, tuple,
+ Anum_pg_class_relpartbound,
+ &isnull);
+ Assert(!isnull);
+ boundspec = (Node *) stringToNode(TextDatumGetCString(datum));
+ boundspecs = lappend(boundspecs, boundspec);
+ partoids = lappend_oid(partoids, inhrelid);
+ ReleaseSysCache(tuple);
+ }
+
+ nparts = list_length(partoids);
+
+ if (nparts > 0)
+ {
+ oids = (Oid *) palloc(nparts * sizeof(Oid));
+ i = 0;
+ foreach(cell, partoids)
+ oids[i++] = lfirst_oid(cell);
+
+ /* Convert from node to the internal representation */
+ if (key->strategy == PARTITION_STRATEGY_LIST)
+ {
+ List *non_null_values = NIL;
+
+ /*
+ * Create a unified list of non-null values across all partitions.
+ */
+ i = 0;
+ found_null = false;
+ null_index = -1;
+ foreach(cell, boundspecs)
+ {
+ ListCell *c;
+ PartitionBoundSpec *spec = lfirst(cell);
+
+ if (spec->strategy != PARTITION_STRATEGY_LIST)
+ elog(ERROR, "invalid strategy in partition bound spec");
+
+ foreach(c, spec->listdatums)
+ {
+ Const *val = lfirst(c);
+ PartitionListValue *list_value = NULL;
+
+ if (!val->constisnull)
+ {
+ list_value = (PartitionListValue *)
+ palloc0(sizeof(PartitionListValue));
+ list_value->index = i;
+ list_value->value = val->constvalue;
+ }
+ else
+ {
+ /*
+ * Never put a null into the values array, flag
+ * instead for the code further down below where we
+ * construct the actual relcache struct.
+ */
+ if (found_null)
+ elog(ERROR, "found null more than once");
+ found_null = true;
+ null_index = i;
+ }
+
+ if (list_value)
+ non_null_values = lappend(non_null_values,
+ list_value);
+ }
+
+ i++;
+ }
+
+ ndatums = list_length(non_null_values);
+
+ /*
+ * Collect all list values in one array. Alongside the value, we
+ * also save the index of partition the value comes from.
+ */
+ all_values = (PartitionListValue **) palloc(ndatums *
+ sizeof(PartitionListValue *));
+ i = 0;
+ foreach(cell, non_null_values)
+ {
+ PartitionListValue *src = lfirst(cell);
+
+ all_values[i] = (PartitionListValue *)
+ palloc(sizeof(PartitionListValue));
+ all_values[i]->value = src->value;
+ all_values[i]->index = src->index;
+ i++;
+ }
+
+ qsort_arg(all_values, ndatums, sizeof(PartitionListValue *),
+ qsort_partition_list_value_cmp, (void *) key);
+ }
+ else if (key->strategy == PARTITION_STRATEGY_RANGE)
+ {
+ int j,
+ k;
+ PartitionRangeBound **all_bounds,
+ *prev;
+ bool *distinct_indexes;
+
+ all_bounds = (PartitionRangeBound **) palloc0(2 * nparts *
+ sizeof(PartitionRangeBound *));
+ distinct_indexes = (bool *) palloc(2 * nparts * sizeof(bool));
+
+ /*
+ * Create a unified list of range bounds across all the
+ * partitions.
+ */
+ i = j = 0;
+ foreach(cell, boundspecs)
+ {
+ PartitionBoundSpec *spec = lfirst(cell);
+ PartitionRangeBound *lower,
+ *upper;
+
+ if (spec->strategy != PARTITION_STRATEGY_RANGE)
+ elog(ERROR, "invalid strategy in partition bound spec");
+
+ lower = make_one_range_bound(key, i, spec->lowerdatums,
+ true);
+ upper = make_one_range_bound(key, i, spec->upperdatums,
+ false);
+ all_bounds[j] = lower;
+ all_bounds[j + 1] = upper;
+ j += 2;
+ i++;
+ }
+ Assert(j == 2 * nparts);
+
+ /* Sort all the bounds in ascending order */
+ qsort_arg(all_bounds, 2 * nparts,
+ sizeof(PartitionRangeBound *),
+ qsort_partition_rbound_cmp,
+ (void *) key);
+
+ /*
+ * Count the number of distinct bounds to allocate an array of
+ * that size.
+ */
+ ndatums = 0;
+ prev = NULL;
+ for (i = 0; i < 2 * nparts; i++)
+ {
+ PartitionRangeBound *cur = all_bounds[i];
+ bool is_distinct = false;
+ int j;
+
+ /* Is current bound is distinct from the previous? */
+ for (j = 0; j < key->partnatts; j++)
+ {
+ Datum cmpval;
+
+ if (prev == NULL)
+ {
+ is_distinct = true;
+ break;
+ }
+
+ /*
+ * If either of them has infinite element, we can't equate
+ * them. Even when both are infinite, they'd have
+ * opposite signs, because only one of cur and prev is a
+ * lower bound).
+ */
+ if (cur->content[j] != RANGE_DATUM_FINITE ||
+ prev->content[j] != RANGE_DATUM_FINITE)
+ {
+ is_distinct = true;
+ break;
+ }
+ cmpval = FunctionCall2Coll(&key->partsupfunc[j],
+ key->partcollation[j],
+ cur->datums[j],
+ prev->datums[j]);
+ if (DatumGetInt32(cmpval) != 0)
+ {
+ is_distinct = true;
+ break;
+ }
+ }
+
+ /*
+ * Count the current bound if it is distinct from the previous
+ * one. Also, store if the index i contains a distinct bound
+ * that we'd like put in the relcache array.
+ */
+ if (is_distinct)
+ {
+ distinct_indexes[i] = true;
+ ndatums++;
+ }
+ else
+ distinct_indexes[i] = false;
+
+ prev = cur;
+ }
+
+ /*
+ * Finally save them in an array from where they will be copied
+ * into the relcache.
+ */
+ rbounds = (PartitionRangeBound **) palloc(ndatums *
+ sizeof(PartitionRangeBound *));
+ k = 0;
+ for (i = 0; i < 2 * nparts; i++)
+ {
+ if (distinct_indexes[i])
+ rbounds[k++] = all_bounds[i];
+ }
+ Assert(k == ndatums);
+ }
+ else
+ elog(ERROR, "unexpected partition strategy: %d",
+ (int) key->strategy);
+ }
+
+ /* Now build the actual relcache partition descriptor */
+ rel->rd_pdcxt = AllocSetContextCreate(CacheMemoryContext,
+ RelationGetRelationName(rel),
+ ALLOCSET_DEFAULT_SIZES);
+ oldcxt = MemoryContextSwitchTo(rel->rd_pdcxt);
+
+ result = (PartitionDescData *) palloc0(sizeof(PartitionDescData));
+ result->nparts = nparts;
+ if (nparts > 0)
+ {
+ PartitionBoundInfo boundinfo;
+ int *mapping;
+ int next_index = 0;
+
+ result->oids = (Oid *) palloc0(nparts * sizeof(Oid));
+
+ boundinfo = (PartitionBoundInfoData *)
+ palloc0(sizeof(PartitionBoundInfoData));
+ boundinfo->strategy = key->strategy;
+ boundinfo->ndatums = ndatums;
+ boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
+
+ /* Initialize mapping array with invalid values */
+ mapping = (int *) palloc(sizeof(int) * nparts);
+ for (i = 0; i < nparts; i++)
+ mapping[i] = -1;
+
+ switch (key->strategy)
+ {
+ case PARTITION_STRATEGY_LIST:
+ {
+ boundinfo->has_null = found_null;
+ boundinfo->indexes = (int *) palloc(ndatums * sizeof(int));
+
+ /*
+ * Copy values. Indexes of individual values are mapped
+ * to canonical values so that they match for any two list
+ * partitioned tables with same number of partitions and
+ * same lists per partition. One way to canonicalize is
+ * to assign the index in all_values[] of the smallest
+ * value of each partition, as the index of all of the
+ * partition's values.
+ */
+ for (i = 0; i < ndatums; i++)
+ {
+ boundinfo->datums[i] = (Datum *) palloc(sizeof(Datum));
+ boundinfo->datums[i][0] = datumCopy(all_values[i]->value,
+ key->parttypbyval[0],
+ key->parttyplen[0]);
+
+ /* If the old index has no mapping, assign one */
+ if (mapping[all_values[i]->index] == -1)
+ mapping[all_values[i]->index] = next_index++;
+
+ boundinfo->indexes[i] = mapping[all_values[i]->index];
+ }
+
+ /*
+ * If null-accepting partition has no mapped index yet,
+ * assign one. This could happen if such partition
+ * accepts only null and hence not covered in the above
+ * loop which only handled non-null values.
+ */
+ if (found_null)
+ {
+ Assert(null_index >= 0);
+ if (mapping[null_index] == -1)
+ mapping[null_index] = next_index++;
+ }
+
+ /* All partition must now have a valid mapping */
+ Assert(next_index == nparts);
+
+ if (found_null)
+ boundinfo->null_index = mapping[null_index];
+ else
+ boundinfo->null_index = -1;
+ break;
+ }
+
+ case PARTITION_STRATEGY_RANGE:
+ {
+ boundinfo->content = (RangeDatumContent **) palloc(ndatums *
+ sizeof(RangeDatumContent *));
+ boundinfo->indexes = (int *) palloc((ndatums + 1) *
+ sizeof(int));
+
+ for (i = 0; i < ndatums; i++)
+ {
+ int j;
+
+ boundinfo->datums[i] = (Datum *) palloc(key->partnatts *
+ sizeof(Datum));
+ boundinfo->content[i] = (RangeDatumContent *)
+ palloc(key->partnatts *
+ sizeof(RangeDatumContent));
+ for (j = 0; j < key->partnatts; j++)
+ {
+ if (rbounds[i]->content[j] == RANGE_DATUM_FINITE)
+ boundinfo->datums[i][j] =
+ datumCopy(rbounds[i]->datums[j],
+ key->parttypbyval[j],
+ key->parttyplen[j]);
+ /* Remember, we are storing the tri-state value. */
+ boundinfo->content[i][j] = rbounds[i]->content[j];
+ }
+
+ /*
+ * There is no mapping for invalid indexes.
+ *
+ * Any lower bounds in the rbounds array have invalid
+ * indexes assigned, because the values between the
+ * previous bound (if there is one) and this (lower)
+ * bound are not part of the range of any existing
+ * partition.
+ */
+ if (rbounds[i]->lower)
+ boundinfo->indexes[i] = -1;
+ else
+ {
+ int orig_index = rbounds[i]->index;
+
+ /* If the old index is has no mapping, assign one */
+ if (mapping[orig_index] == -1)
+ mapping[orig_index] = next_index++;
+
+ boundinfo->indexes[i] = mapping[orig_index];
+ }
+ }
+ boundinfo->indexes[i] = -1;
+ break;
+ }
+
+ default:
+ elog(ERROR, "unexpected partition strategy: %d",
+ (int) key->strategy);
+ }
+
+ result->boundinfo = boundinfo;
+
+ /*
+ * Now assign OIDs from the original array into mapped indexes of the
+ * result array. Order of OIDs in the former is defined by the
+ * catalog scan that retrived them, whereas that in the latter is
+ * defined by canonicalized representation of the list values or the
+ * range bounds.
+ */
+ for (i = 0; i < nparts; i++)
+ result->oids[mapping[i]] = oids[i];
+ pfree(mapping);
+ }
+
+ MemoryContextSwitchTo(oldcxt);
+ rel->rd_partdesc = result;
+}
+
+/*
+ * Are two partition bound collections logically equal?
+ *
+ * Used in the keep logic of relcache.c (ie, in RelationClearRelation()).
+ * This is also useful when b1 and b2 are bound collections of two separate
+ * relations, respectively, because PartitionBoundInfo is a canonical
+ * representation of partition bounds.
+ */
+bool
+partition_bounds_equal(PartitionKey key,
+ PartitionBoundInfo b1, PartitionBoundInfo b2)
+{
+ int i;
+
+ if (b1->strategy != b2->strategy)
+ return false;
+
+ if (b1->ndatums != b2->ndatums)
+ return false;
+
+ if (b1->has_null != b2->has_null)
+ return false;
+
+ if (b1->null_index != b2->null_index)
+ return false;
+
+ for (i = 0; i < b1->ndatums; i++)
+ {
+ int j;
+
+ for (j = 0; j < key->partnatts; j++)
+ {
+ int32 cmpval;
+
+ cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[j],
+ key->partcollation[j],
+ b1->datums[i][j],
+ b2->datums[i][j]));
+ if (cmpval != 0)
+ return false;
+
+ /* Range partitions can have infinite datums */
+ if (b1->content != NULL && b1->content[i][j] != b2->content[i][j])
+ return false;
+ }
+
+ if (b1->indexes[i] != b2->indexes[i])
+ return false;
+ }
+
+ /* There are ndatums+1 indexes in case of range partitions */
+ if (key->strategy == PARTITION_STRATEGY_RANGE &&
+ b1->indexes[i] != b2->indexes[i])
+ return false;
+
+ return true;
+}
+
+/*
+ * check_new_partition_bound
+ *
+ * Checks if the new partition's bound overlaps any of the existing partitions
+ * of parent. Also performs additional checks as necessary per strategy.
+ */
+void
+check_new_partition_bound(char *relname, Relation parent, Node *bound)
+{
+ PartitionBoundSpec *spec = (PartitionBoundSpec *) bound;
+ PartitionKey key = RelationGetPartitionKey(parent);
+ PartitionDesc partdesc = RelationGetPartitionDesc(parent);
+ ParseState *pstate = make_parsestate(NULL);
+ int with = -1;
+ bool overlap = false;
+
+ switch (key->strategy)
+ {
+ case PARTITION_STRATEGY_LIST:
+ {
+ Assert(spec->strategy == PARTITION_STRATEGY_LIST);
+
+ if (partdesc->nparts > 0)
+ {
+ PartitionBoundInfo boundinfo = partdesc->boundinfo;
+ ListCell *cell;
+
+ Assert(boundinfo &&
+ boundinfo->strategy == PARTITION_STRATEGY_LIST &&
+ (boundinfo->ndatums > 0 || boundinfo->has_null));
+
+ foreach(cell, spec->listdatums)
+ {
+ Const *val = lfirst(cell);
+
+ if (!val->constisnull)
+ {
+ int offset;
+ bool equal;
+
+ offset = partition_bound_bsearch(key, boundinfo,
+ &val->constvalue,
+ true, &equal);
+ if (offset >= 0 && equal)
+ {
+ overlap = true;
+ with = boundinfo->indexes[offset];
+ break;
+ }
+ }
+ else if (boundinfo->has_null)
+ {
+ overlap = true;
+ with = boundinfo->null_index;
+ break;
+ }
+ }
+ }
+
+ break;
+ }
+
+ case PARTITION_STRATEGY_RANGE:
+ {
+ PartitionRangeBound *lower,
+ *upper;
+
+ Assert(spec->strategy == PARTITION_STRATEGY_RANGE);
+ lower = make_one_range_bound(key, -1, spec->lowerdatums, true);
+ upper = make_one_range_bound(key, -1, spec->upperdatums, false);
+
+ /*
+ * First check if the resulting range would be empty with
+ * specified lower and upper bounds
+ */
+ if (partition_rbound_cmp(key, lower->datums, lower->content, true,
+ upper) >= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("cannot create range partition with empty range"),
+ parser_errposition(pstate, spec->location)));
+
+ if (partdesc->nparts > 0)
+ {
+ PartitionBoundInfo boundinfo = partdesc->boundinfo;
+ int off1,
+ off2;
+ bool equal = false;
+
+ Assert(boundinfo && boundinfo->ndatums > 0 &&
+ boundinfo->strategy == PARTITION_STRATEGY_RANGE);
+
+ /*
+ * Find the greatest index of a range bound that is less
+ * than or equal with the new lower bound.
+ */
+ off1 = partition_bound_bsearch(key, boundinfo, lower, true,
+ &equal);
+
+ /*
+ * If equal has been set to true, that means the new lower
+ * bound is found to be equal with the bound at off1,
+ * which clearly means an overlap with the partition at
+ * index off1+1).
+ *
+ * Otherwise, check if there is a "gap" that could be
+ * occupied by the new partition. In case of a gap, the
+ * new upper bound should not cross past the upper
+ * boundary of the gap, that is, off2 == off1 should be
+ * true.
+ */
+ if (!equal && boundinfo->indexes[off1 + 1] < 0)
+ {
+ off2 = partition_bound_bsearch(key, boundinfo, upper,
+ true, &equal);
+
+ if (equal || off1 != off2)
+ {
+ overlap = true;
+ with = boundinfo->indexes[off2 + 1];
+ }
+ }
+ else
+ {
+ overlap = true;
+ with = boundinfo->indexes[off1 + 1];
+ }
+ }
+
+ break;
+ }
+
+ default:
+ elog(ERROR, "unexpected partition strategy: %d",
+ (int) key->strategy);
+ }
+
+ if (overlap)
+ {
+ Assert(with >= 0);
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("partition \"%s\" would overlap partition \"%s\"",
+ relname, get_rel_name(partdesc->oids[with])),
+ parser_errposition(pstate, spec->location)));
+ }
+}
+
+/*
+ * get_partition_parent
+ *
+ * Returns inheritance parent of a partition by scanning pg_inherits
+ *
+ * Note: Because this function assumes that the relation whose OID is passed
+ * as an argument will have precisely one parent, it should only be called
+ * when it is known that the relation is a partition.
+ */
+Oid
+get_partition_parent(Oid relid)
+{
+ Form_pg_inherits form;
+ Relation catalogRelation;
+ SysScanDesc scan;
+ ScanKeyData key[2];
+ HeapTuple tuple;
+ Oid result;
+
+ catalogRelation = heap_open(InheritsRelationId, AccessShareLock);
+
+ ScanKeyInit(&key[0],
+ Anum_pg_inherits_inhrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(relid));
+ ScanKeyInit(&key[1],
+ Anum_pg_inherits_inhseqno,
+ BTEqualStrategyNumber, F_INT4EQ,
+ Int32GetDatum(1));
+
+ scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId, true,
+ NULL, 2, key);
+
+ tuple = systable_getnext(scan);
+ Assert(HeapTupleIsValid(tuple));
+
+ form = (Form_pg_inherits) GETSTRUCT(tuple);
+ result = form->inhparent;
+
+ systable_endscan(scan);
+ heap_close(catalogRelation, AccessShareLock);
+
+ return result;
+}
+
+/*
+ * get_qual_from_partbound
+ * Given a parser node for partition bound, return the list of executable
+ * expressions as partition constraint
+ */
+List *
+get_qual_from_partbound(Relation rel, Relation parent, Node *bound)
+{
+ PartitionBoundSpec *spec = (PartitionBoundSpec *) bound;
+ PartitionKey key = RelationGetPartitionKey(parent);
+ List *my_qual = NIL;
+ TupleDesc parent_tupdesc = RelationGetDescr(parent);
+ AttrNumber parent_attno;
+ AttrNumber *partition_attnos;
+ bool found_whole_row;
+
+ Assert(key != NULL);
+
+ switch (key->strategy)
+ {
+ case PARTITION_STRATEGY_LIST:
+ Assert(spec->strategy == PARTITION_STRATEGY_LIST);
+ my_qual = get_qual_for_list(key, spec);
+ break;
+
+ case PARTITION_STRATEGY_RANGE:
+ Assert(spec->strategy == PARTITION_STRATEGY_RANGE);
+ my_qual = get_qual_for_range(key, spec);
+ break;
+
+ default:
+ elog(ERROR, "unexpected partition strategy: %d",
+ (int) key->strategy);
+ }
+
+ /*
+ * Translate vars in the generated expression to have correct attnos. Note
+ * that the vars in my_qual bear attnos dictated by key which carries
+ * physical attnos of the parent. We must allow for a case where physical
+ * attnos of a partition can be different from the parent.
+ */
+ partition_attnos = (AttrNumber *)
+ palloc0(parent_tupdesc->natts * sizeof(AttrNumber));
+ for (parent_attno = 1; parent_attno <= parent_tupdesc->natts;
+ parent_attno++)
+ {
+ Form_pg_attribute attribute = parent_tupdesc->attrs[parent_attno - 1];
+ char *attname = NameStr(attribute->attname);
+ AttrNumber partition_attno;
+
+ if (attribute->attisdropped)
+ continue;
+
+ partition_attno = get_attnum(RelationGetRelid(rel), attname);
+ partition_attnos[parent_attno - 1] = partition_attno;
+ }
+
+ my_qual = (List *) map_variable_attnos((Node *) my_qual,
+ 1, 0,
+ partition_attnos,
+ parent_tupdesc->natts,
+ &found_whole_row);
+ /* there can never be a whole-row reference here */
+ if (found_whole_row)
+ elog(ERROR, "unexpected whole-row reference found in partition key");
+
+ return my_qual;
+}
+
+/*
+ * RelationGetPartitionQual
+ *
+ * Returns a list of partition quals
+ */
+List *
+RelationGetPartitionQual(Relation rel, bool recurse)
+{
+ /* Quick exit */
+ if (!rel->rd_rel->relispartition)
+ return NIL;
+
+ return generate_partition_qual(rel, recurse);
+}
+
+/* Turn an array of OIDs with N elements into a list */
+#define OID_ARRAY_TO_LIST(arr, N, list) \
+ do\
+ {\
+ int i;\
+ for (i = 0; i < (N); i++)\
+ (list) = lappend_oid((list), (arr)[i]);\
+ } while(0)
+
+/*
+ * RelationGetPartitionDispatchInfo
+ * Returns information necessary to route tuples down a partition tree
+ *
+ * All the partitions will be locked with lockmode, unless it is NoLock.
+ * A list of the OIDs of all the leaf partition of rel is returned in
+ * *leaf_part_oids.
+ */
+PartitionDispatch *
+RelationGetPartitionDispatchInfo(Relation rel, int lockmode,
+ int *num_parted, List **leaf_part_oids)
+{
+ PartitionDesc rootpartdesc = RelationGetPartitionDesc(rel);
+ PartitionDispatchData **pd;
+ List *all_parts = NIL,
+ *parted_rels;
+ ListCell *lc;
+ int i,
+ k;
+
+ /*
+ * Lock partitions and make a list of the partitioned ones to prepare
+ * their PartitionDispatch objects below.
+ *
+ * Cannot use find_all_inheritors() here, because then the order of OIDs
+ * in parted_rels list would be unknown, which does not help, because we
+ * we assign indexes within individual PartitionDispatch in an order that
+ * is predetermined (determined by the order of OIDs in individual
+ * partition descriptors).
+ */
+ *num_parted = 1;
+ parted_rels = list_make1(rel);
+ OID_ARRAY_TO_LIST(rootpartdesc->oids, rootpartdesc->nparts, all_parts);
+ foreach(lc, all_parts)
+ {
+ Relation partrel = heap_open(lfirst_oid(lc), lockmode);
+ PartitionDesc partdesc = RelationGetPartitionDesc(partrel);
+
+ /*
+ * If this partition is a partitioned table, add its children to the
+ * end of the list, so that they are processed as well.
+ */
+ if (partdesc)
+ {
+ (*num_parted)++;
+ parted_rels = lappend(parted_rels, partrel);
+ OID_ARRAY_TO_LIST(partdesc->oids, partdesc->nparts, all_parts);
+ }
+ else
+ heap_close(partrel, NoLock);
+
+ /*
+ * We keep the partitioned ones open until we're done using the
+ * information being collected here (for example, see
+ * ExecEndModifyTable).
+ */
+ }
+
+ /* Generate PartitionDispatch objects for all partitioned tables */
+ pd = (PartitionDispatchData **) palloc(*num_parted *
+ sizeof(PartitionDispatchData *));
+ *leaf_part_oids = NIL;
+ i = k = 0;
+ foreach(lc, parted_rels)
+ {
+ Relation partrel = lfirst(lc);
+ PartitionKey partkey = RelationGetPartitionKey(partrel);
+ PartitionDesc partdesc = RelationGetPartitionDesc(partrel);
+ int j,
+ m;
+
+ pd[i] = (PartitionDispatch) palloc(sizeof(PartitionDispatchData));
+ pd[i]->reldesc = partrel;
+ pd[i]->key = partkey;
+ pd[i]->keystate = NIL;
+ pd[i]->partdesc = partdesc;
+ pd[i]->indexes = (int *) palloc(partdesc->nparts * sizeof(int));
+
+ m = 0;
+ for (j = 0; j < partdesc->nparts; j++)
+ {
+ Oid partrelid = partdesc->oids[j];
+
+ if (get_rel_relkind(partrelid) != RELKIND_PARTITIONED_TABLE)
+ {
+ *leaf_part_oids = lappend_oid(*leaf_part_oids, partrelid);
+ pd[i]->indexes[j] = k++;
+ }
+ else
+ {
+ /*
+ * We can assign indexes this way because of the way
+ * parted_rels has been generated.
+ */
+ pd[i]->indexes[j] = -(i + 1 + m);
+ m++;
+ }
+ }
+ i++;
+ }
+
+ return pd;
+}
+
+/* Module-local functions */
+
+/*
+ * get_qual_for_list
+ *
+ * Returns a list of expressions to use as a list partition's constraint.
+ */
+static List *
+get_qual_for_list(PartitionKey key, PartitionBoundSpec *spec)
+{
+ List *result;
+ ArrayExpr *arr;
+ ScalarArrayOpExpr *opexpr;
+ ListCell *cell,
+ *prev,
+ *next;
+ Node *keyCol;
+ Oid operoid;
+ bool need_relabel,
+ list_has_null = false;
+ NullTest *nulltest1 = NULL,
+ *nulltest2 = NULL;
+
+ /* Left operand is either a simple Var or arbitrary expression */
+ if (key->partattrs[0] != 0)
+ keyCol = (Node *) makeVar(1,
+ key->partattrs[0],
+ key->parttypid[0],
+ key->parttypmod[0],
+ key->parttypcoll[0],
+ 0);
+ else
+ keyCol = (Node *) copyObject(linitial(key->partexprs));
+
+ /*
+ * We must remove any NULL value in the list; we handle it separately
+ * below.
+ */
+ prev = NULL;
+ for (cell = list_head(spec->listdatums); cell; cell = next)
+ {
+ Const *val = (Const *) lfirst(cell);
+
+ next = lnext(cell);
+
+ if (val->constisnull)
+ {
+ list_has_null = true;
+ spec->listdatums = list_delete_cell(spec->listdatums,
+ cell, prev);
+ }
+ else
+ prev = cell;
+ }
+
+ if (!list_has_null)
+ {
+ /*
+ * Gin up a col IS NOT NULL test that will be AND'd with other
+ * expressions
+ */
+ nulltest1 = makeNode(NullTest);
+ nulltest1->arg = (Expr *) keyCol;
+ nulltest1->nulltesttype = IS_NOT_NULL;
+ nulltest1->argisrow = false;
+ nulltest1->location = -1;
+ }
+ else
+ {
+ /*
+ * Gin up a col IS NULL test that will be OR'd with other expressions
+ */
+ nulltest2 = makeNode(NullTest);
+ nulltest2->arg = (Expr *) keyCol;
+ nulltest2->nulltesttype = IS_NULL;
+ nulltest2->argisrow = false;
+ nulltest2->location = -1;
+ }
+
+ /* Right operand is an ArrayExpr containing this partition's values */
+ arr = makeNode(ArrayExpr);
+ arr->array_typeid = !type_is_array(key->parttypid[0])
+ ? get_array_type(key->parttypid[0])
+ : key->parttypid[0];
+ arr->array_collid = key->parttypcoll[0];
+ arr->element_typeid = key->parttypid[0];
+ arr->elements = spec->listdatums;
+ arr->multidims = false;
+ arr->location = -1;
+
+ /* Get the correct btree equality operator */
+ operoid = get_partition_operator(key, 0, BTEqualStrategyNumber,
+ &need_relabel);
+ if (need_relabel || key->partcollation[0] != key->parttypcoll[0])
+ keyCol = (Node *) makeRelabelType((Expr *) keyCol,
+ key->partopcintype[0],
+ -1,
+ key->partcollation[0],
+ COERCE_EXPLICIT_CAST);
+
+ /* Build leftop = ANY (rightop) */
+ opexpr = makeNode(ScalarArrayOpExpr);
+ opexpr->opno = operoid;
+ opexpr->opfuncid = get_opcode(operoid);
+ opexpr->useOr = true;
+ opexpr->inputcollid = key->partcollation[0];
+ opexpr->args = list_make2(keyCol, arr);
+ opexpr->location = -1;
+
+ if (nulltest1)
+ result = list_make2(nulltest1, opexpr);
+ else if (nulltest2)
+ {
+ Expr *or;
+
+ or = makeBoolExpr(OR_EXPR, list_make2(nulltest2, opexpr), -1);
+ result = list_make1(or);
+ }
+ else
+ result = list_make1(opexpr);
+
+ return result;
+}
+
+/*
+ * get_qual_for_range
+ *
+ * Get a list of OpExpr's to use as a range partition's constraint.
+ */
+static List *
+get_qual_for_range(PartitionKey key, PartitionBoundSpec *spec)
+{
+ List *result = NIL;
+ ListCell *cell1,
+ *cell2,
+ *partexprs_item;
+ int i;
+
+ /*
+ * Iterate over columns of the key, emitting an OpExpr for each using the
+ * corresponding lower and upper datums as constant operands.
+ */
+ i = 0;
+ partexprs_item = list_head(key->partexprs);
+ forboth(cell1, spec->lowerdatums, cell2, spec->upperdatums)
+ {
+ PartitionRangeDatum *ldatum = lfirst(cell1),
+ *udatum = lfirst(cell2);
+ Node *keyCol;
+ Const *lower_val = NULL,
+ *upper_val = NULL;
+ EState *estate;
+ MemoryContext oldcxt;
+ Expr *test_expr;
+ ExprState *test_exprstate;
+ Datum test_result;
+ bool isNull;
+ bool need_relabel = false;
+ Oid operoid;
+ NullTest *nulltest;
+
+ /* Left operand */
+ if (key->partattrs[i] != 0)
+ {
+ keyCol = (Node *) makeVar(1,
+ key->partattrs[i],
+ key->parttypid[i],
+ key->parttypmod[i],
+ key->parttypcoll[i],
+ 0);
+ }
+ else
+ {
+ keyCol = (Node *) copyObject(lfirst(partexprs_item));
+ partexprs_item = lnext(partexprs_item);
+ }
+
+ /*
+ * Emit a IS NOT NULL expression for non-Var keys, because whereas
+ * simple attributes are covered by NOT NULL constraints, expression
+ * keys are still nullable which is not acceptable in case of range
+ * partitioning.
+ */
+ if (!IsA(keyCol, Var))
+ {
+ nulltest = makeNode(NullTest);
+ nulltest->arg = (Expr *) keyCol;
+ nulltest->nulltesttype = IS_NOT_NULL;
+ nulltest->argisrow = false;
+ nulltest->location = -1;
+ result = lappend(result, nulltest);
+ }
+
+ /*
+ * Stop at this column if either of lower or upper datum is infinite,
+ * but do emit an OpExpr for the non-infinite datum.
+ */
+ if (!ldatum->infinite)
+ lower_val = (Const *) ldatum->value;
+ if (!udatum->infinite)
+ upper_val = (Const *) udatum->value;
+
+ /*
+ * If lower_val and upper_val are both finite and happen to be equal,
+ * emit only (keyCol = lower_val) for this column, because all rows in
+ * this partition could only ever contain this value (ie, lower_val)
+ * in the current partitioning column. We must consider further
+ * columns because the above condition does not fully constrain the
+ * rows of this partition.
+ */
+ if (lower_val && upper_val)
+ {
+ /* Get the correct btree equality operator for the test */
+ operoid = get_partition_operator(key, i, BTEqualStrategyNumber,
+ &need_relabel);
+
+ /* Create the test expression */
+ estate = CreateExecutorState();
+ oldcxt = MemoryContextSwitchTo(estate->es_query_cxt);
+ test_expr = make_opclause(operoid,
+ BOOLOID,
+ false,
+ (Expr *) lower_val,
+ (Expr *) upper_val,
+ InvalidOid,
+ key->partcollation[i]);
+ fix_opfuncids((Node *) test_expr);
+ test_exprstate = ExecInitExpr(test_expr, NULL);
+ test_result = ExecEvalExprSwitchContext(test_exprstate,
+ GetPerTupleExprContext(estate),
+ &isNull, NULL);
+ MemoryContextSwitchTo(oldcxt);
+ FreeExecutorState(estate);
+
+ if (DatumGetBool(test_result))
+ {
+ /* This can never be, but it's better to make sure */
+ if (i == key->partnatts - 1)
+ elog(ERROR, "invalid range bound specification");
+
+ if (need_relabel || key->partcollation[i] != key->parttypcoll[i])
+ keyCol = (Node *) makeRelabelType((Expr *) keyCol,
+ key->partopcintype[i],
+ -1,
+ key->partcollation[i],
+ COERCE_EXPLICIT_CAST);
+ result = lappend(result,
+ make_opclause(operoid,
+ BOOLOID,
+ false,
+ (Expr *) keyCol,
+ (Expr *) lower_val,
+ InvalidOid,
+ key->partcollation[i]));
+
+ /* Go over to consider the next column. */
+ i++;
+ continue;
+ }
+ }
+
+ /*
+ * We can say here that lower_val != upper_val. Emit expressions
+ * (keyCol >= lower_val) and (keyCol < upper_val), then stop.
+ */
+ if (lower_val)
+ {
+ operoid = get_partition_operator(key, i,
+ BTGreaterEqualStrategyNumber,
+ &need_relabel);
+
+ if (need_relabel || key->partcollation[i] != key->parttypcoll[i])
+ keyCol = (Node *) makeRelabelType((Expr *) keyCol,
+ key->partopcintype[i],
+ -1,
+ key->partcollation[i],
+ COERCE_EXPLICIT_CAST);
+ result = lappend(result,
+ make_opclause(operoid,
+ BOOLOID,
+ false,
+ (Expr *) keyCol,
+ (Expr *) lower_val,
+ InvalidOid,
+ key->partcollation[i]));
+ }
+
+ if (upper_val)
+ {
+ operoid = get_partition_operator(key, i,
+ BTLessStrategyNumber,
+ &need_relabel);
+
+ if (need_relabel || key->partcollation[i] != key->parttypcoll[i])
+ keyCol = (Node *) makeRelabelType((Expr *) keyCol,
+ key->partopcintype[i],
+ -1,
+ key->partcollation[i],
+ COERCE_EXPLICIT_CAST);
+
+ result = lappend(result,
+ make_opclause(operoid,
+ BOOLOID,
+ false,
+ (Expr *) keyCol,
+ (Expr *) upper_val,
+ InvalidOid,
+ key->partcollation[i]));
+ }
+
+ /*
+ * We can stop at this column, because we would not have checked the
+ * next column when routing a given row into this partition.
+ */
+ break;
+ }
+
+ return result;
+}
+
+/*
+ * get_partition_operator
+ *
+ * Return oid of the operator of given strategy for a given partition key
+ * column.
+ */
+static Oid
+get_partition_operator(PartitionKey key, int col, StrategyNumber strategy,
+ bool *need_relabel)
+{
+ Oid operoid;
+
+ /*
+ * First check if there exists an operator of the given strategy, with
+ * this column's type as both its lefttype and righttype, in the
+ * partitioning operator family specified for the column.
+ */
+ operoid = get_opfamily_member(key->partopfamily[col],
+ key->parttypid[col],
+ key->parttypid[col],
+ strategy);
+
+ /*
+ * If one doesn't exist, we must resort to using an operator in the same
+ * opreator family but with the operator class declared input type. It is
+ * OK to do so, because the column's type is known to be binary-coercible
+ * with the operator class input type (otherwise, the operator class in
+ * question would not have been accepted as the partitioning operator
+ * class). We must however inform the caller to wrap the non-Const
+ * expression with a RelabelType node to denote the implicit coercion. It
+ * ensures that the resulting expression structurally matches similarly
+ * processed expressions within the optimizer.
+ */
+ if (!OidIsValid(operoid))
+ {
+ operoid = get_opfamily_member(key->partopfamily[col],
+ key->partopcintype[col],
+ key->partopcintype[col],
+ strategy);
+ *need_relabel = true;
+ }
+ else
+ *need_relabel = false;
+
+ if (!OidIsValid(operoid))
+ elog(ERROR, "could not find operator for partitioning");
+
+ return operoid;
+}
+
+/*
+ * generate_partition_qual
+ *
+ * Generate partition predicate from rel's partition bound expression
+ *
+ * Result expression tree is stored CacheMemoryContext to ensure it survives
+ * as long as the relcache entry. But we should be running in a less long-lived
+ * working context. To avoid leaking cache memory if this routine fails partway
+ * through, we build in working memory and then copy the completed structure
+ * into cache memory.
+ */
+static List *
+generate_partition_qual(Relation rel, bool recurse)
+{
+ HeapTuple tuple;
+ MemoryContext oldcxt;
+ Datum boundDatum;
+ bool isnull;
+ Node *bound;
+ List *my_qual = NIL,
+ *result = NIL;
+ Relation parent;
+
+ /* Guard against stack overflow due to overly deep partition tree */
+ check_stack_depth();
+
+ /* Grab at least an AccessShareLock on the parent table */
+ parent = heap_open(get_partition_parent(RelationGetRelid(rel)),
+ AccessShareLock);
+
+ /* Quick copy */
+ if (rel->rd_partcheck)
+ {
+ if (parent->rd_rel->relispartition && recurse)
+ result = list_concat(generate_partition_qual(parent, true),
+ copyObject(rel->rd_partcheck));
+ else
+ result = copyObject(rel->rd_partcheck);
+
+ heap_close(parent, AccessShareLock);
+ return result;
+ }
+
+ /* Get pg_class.relpartbound */
+ if (!rel->rd_rel->relispartition) /* should not happen */
+ elog(ERROR, "relation \"%s\" has relispartition = false",
+ RelationGetRelationName(rel));
+ tuple = SearchSysCache1(RELOID, RelationGetRelid(rel));
+ boundDatum = SysCacheGetAttr(RELOID, tuple,
+ Anum_pg_class_relpartbound,
+ &isnull);
+ if (isnull) /* should not happen */
+ elog(ERROR, "relation \"%s\" has relpartbound = null",
+ RelationGetRelationName(rel));
+ bound = stringToNode(TextDatumGetCString(boundDatum));
+ ReleaseSysCache(tuple);
+
+ my_qual = get_qual_from_partbound(rel, parent, bound);
+
+ /* If requested, add parent's quals to the list (if any) */
+ if (parent->rd_rel->relispartition && recurse)
+ {
+ List *parent_check;
+
+ parent_check = generate_partition_qual(parent, true);
+ result = list_concat(parent_check, my_qual);
+ }
+ else
+ result = my_qual;
+
+ /* Save a copy of my_qual in the relcache */
+ oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
+ rel->rd_partcheck = copyObject(my_qual);
+ MemoryContextSwitchTo(oldcxt);
+
+ /* Keep the parent locked until commit */
+ heap_close(parent, NoLock);
+
+ return result;
+}
+
+/* ----------------
+ * FormPartitionKeyDatum
+ * Construct values[] and isnull[] arrays for the partition key
+ * of a tuple.
+ *
+ * pkinfo partition key execution info
+ * slot Heap tuple from which to extract partition key
+ * estate executor state for evaluating any partition key
+ * expressions (must be non-NULL)
+ * values Array of partition key Datums (output area)
+ * isnull Array of is-null indicators (output area)
+ *
+ * the ecxt_scantuple slot of estate's per-tuple expr context must point to
+ * the heap tuple passed in.
+ * ----------------
+ */
+static void
+FormPartitionKeyDatum(PartitionDispatch pd,
+ TupleTableSlot *slot,
+ EState *estate,
+ Datum *values,
+ bool *isnull)
+{
+ ListCell *partexpr_item;
+ int i;
+
+ if (pd->key->partexprs != NIL && pd->keystate == NIL)
+ {
+ /* Check caller has set up context correctly */
+ Assert(estate != NULL &&
+ GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
+
+ /* First time through, set up expression evaluation state */
+ pd->keystate = (List *) ExecPrepareExpr((Expr *) pd->key->partexprs,
+ estate);
+ }
+
+ partexpr_item = list_head(pd->keystate);
+ for (i = 0; i < pd->key->partnatts; i++)
+ {
+ AttrNumber keycol = pd->key->partattrs[i];
+ Datum datum;
+ bool isNull;
+
+ if (keycol != 0)
+ {
+ /* Plain column; get the value directly from the heap tuple */
+ datum = slot_getattr(slot, keycol, &isNull);
+ }
+ else
+ {
+ /* Expression; need to evaluate it */
+ if (partexpr_item == NULL)
+ elog(ERROR, "wrong number of partition key expressions");
+ datum = ExecEvalExprSwitchContext((ExprState *) lfirst(partexpr_item),
+ GetPerTupleExprContext(estate),
+ &isNull,
+ NULL);
+ partexpr_item = lnext(partexpr_item);
+ }
+ values[i] = datum;
+ isnull[i] = isNull;
+ }
+
+ if (partexpr_item != NULL)
+ elog(ERROR, "wrong number of partition key expressions");
+}
+
+/*
+ * get_partition_for_tuple
+ * Finds a leaf partition for tuple contained in *slot
+ *
+ * Returned value is the sequence number of the leaf partition thus found,
+ * or -1 if no leaf partition is found for the tuple. *failed_at is set
+ * to the OID of the partitioned table whose partition was not found in
+ * the latter case.
+ */
+int
+get_partition_for_tuple(PartitionDispatch * pd,
+ TupleTableSlot *slot,
+ EState *estate,
+ Oid *failed_at)
+{
+ PartitionDispatch parent;
+ Datum values[PARTITION_MAX_KEYS];
+ bool isnull[PARTITION_MAX_KEYS];
+ int cur_offset,
+ cur_index;
+ int i;
+
+ /* start with the root partitioned table */
+ parent = pd[0];
+ while (true)
+ {
+ PartitionKey key = parent->key;
+ PartitionDesc partdesc = parent->partdesc;
+
+ /* Quick exit */
+ if (partdesc->nparts == 0)
+ {
+ *failed_at = RelationGetRelid(parent->reldesc);
+ return -1;
+ }
+
+ /* Extract partition key from tuple */
+ FormPartitionKeyDatum(parent, slot, estate, values, isnull);
+
+ if (key->strategy == PARTITION_STRATEGY_RANGE)
+ {
+ /* Disallow nulls in the range partition key of the tuple */
+ for (i = 0; i < key->partnatts; i++)
+ if (isnull[i])
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("range partition key of row contains null")));
+ }
+
+ if (partdesc->boundinfo->has_null && isnull[0])
+ /* Tuple maps to the null-accepting list partition */
+ cur_index = partdesc->boundinfo->null_index;
+ else
+ {
+ /* Else bsearch in partdesc->boundinfo */
+ bool equal = false;
+
+ cur_offset = partition_bound_bsearch(key, partdesc->boundinfo,
+ values, false, &equal);
+ switch (key->strategy)
+ {
+ case PARTITION_STRATEGY_LIST:
+ if (cur_offset >= 0 && equal)
+ cur_index = partdesc->boundinfo->indexes[cur_offset];
+ else
+ cur_index = -1;
+ break;
+
+ case PARTITION_STRATEGY_RANGE:
+
+ /*
+ * Offset returned is such that the bound at offset is
+ * found to be less or equal with the tuple. So, the bound
+ * at offset+1 would be the upper bound.
+ */
+ cur_index = partdesc->boundinfo->indexes[cur_offset + 1];
+ break;
+
+ default:
+ elog(ERROR, "unexpected partition strategy: %d",
+ (int) key->strategy);
+ }
+ }
+
+ /*
+ * cur_index < 0 means we failed to find a partition of this parent.
+ * cur_index >= 0 means we either found the leaf partition, or the
+ * next parent to find a partition of.
+ */
+ if (cur_index < 0)
+ {
+ *failed_at = RelationGetRelid(parent->reldesc);
+ return -1;
+ }
+ else if (parent->indexes[cur_index] < 0)
+ parent = pd[-parent->indexes[cur_index]];
+ else
+ break;
+ }
+
+ return parent->indexes[cur_index];
+}
+
+/*
+ * qsort_partition_list_value_cmp
+ *
+ * Compare two list partition bound datums
+ */
+static int32
+qsort_partition_list_value_cmp(const void *a, const void *b, void *arg)
+{
+ Datum val1 = (*(const PartitionListValue **) a)->value,
+ val2 = (*(const PartitionListValue **) b)->value;
+ PartitionKey key = (PartitionKey) arg;
+
+ return DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
+ key->partcollation[0],
+ val1, val2));
+}
+
+/*
+ * make_one_range_bound
+ *
+ * Return a PartitionRangeBound given a list of PartitionRangeDatum elements
+ * and a flag telling whether the bound is lower or not. Made into a function
+ * because there are multiple sites that want to use this facility.
+ */
+static PartitionRangeBound *
+make_one_range_bound(PartitionKey key, int index, List *datums, bool lower)
+{
+ PartitionRangeBound *bound;
+ ListCell *cell;
+ int i;
+
+ bound = (PartitionRangeBound *) palloc0(sizeof(PartitionRangeBound));
+ bound->index = index;
+ bound->datums = (Datum *) palloc0(key->partnatts * sizeof(Datum));
+ bound->content = (RangeDatumContent *) palloc0(key->partnatts *
+ sizeof(RangeDatumContent));
+ bound->lower = lower;
+
+ i = 0;
+ foreach(cell, datums)
+ {
+ PartitionRangeDatum *datum = lfirst(cell);
+
+ /* What's contained in this range datum? */
+ bound->content[i] = !datum->infinite
+ ? RANGE_DATUM_FINITE
+ : (lower ? RANGE_DATUM_NEG_INF
+ : RANGE_DATUM_POS_INF);
+
+ if (bound->content[i] == RANGE_DATUM_FINITE)
+ {
+ Const *val = (Const *) datum->value;
+
+ if (val->constisnull)
+ elog(ERROR, "invalid range bound datum");
+ bound->datums[i] = val->constvalue;
+ }
+
+ i++;
+ }
+
+ return bound;
+}
+
+/* Used when sorting range bounds across all range partitions */
+static int32
+qsort_partition_rbound_cmp(const void *a, const void *b, void *arg)
+{
+ PartitionRangeBound *b1 = (*(PartitionRangeBound *const *) a);
+ PartitionRangeBound *b2 = (*(PartitionRangeBound *const *) b);
+ PartitionKey key = (PartitionKey) arg;
+
+ return partition_rbound_cmp(key, b1->datums, b1->content, b1->lower, b2);
+}
+
+/*
+ * partition_rbound_cmp
+ *
+ * Return for two range bounds whether the 1st one (specified in datum1,
+ * content1, and lower1) is <=, =, >= the bound specified in *b2
+ */
+static int32
+partition_rbound_cmp(PartitionKey key,
+ Datum *datums1, RangeDatumContent *content1, bool lower1,
+ PartitionRangeBound *b2)
+{
+ int32 cmpval;
+ int i;
+ Datum *datums2 = b2->datums;
+ RangeDatumContent *content2 = b2->content;
+ bool lower2 = b2->lower;
+
+ for (i = 0; i < key->partnatts; i++)
+ {
+ /*
+ * First, handle cases involving infinity, which don't require
+ * invoking the comparison proc.
+ */
+ if (content1[i] != RANGE_DATUM_FINITE &&
+ content2[i] != RANGE_DATUM_FINITE)
+
+ /*
+ * Both are infinity, so they are equal unless one is negative
+ * infinity and other positive (or vice versa)
+ */
+ return content1[i] == content2[i] ? 0
+ : (content1[i] < content2[i] ? -1 : 1);
+ else if (content1[i] != RANGE_DATUM_FINITE)
+ return content1[i] == RANGE_DATUM_NEG_INF ? -1 : 1;
+ else if (content2[i] != RANGE_DATUM_FINITE)
+ return content2[i] == RANGE_DATUM_NEG_INF ? 1 : -1;
+
+ cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[i],
+ key->partcollation[i],
+ datums1[i],
+ datums2[i]));
+ if (cmpval != 0)
+ break;
+ }
+
+ /*
+ * If the comparison is anything other than equal, we're done. If they
+ * compare equal though, we still have to consider whether the boundaries
+ * are inclusive or exclusive. Exclusive one is considered smaller of the
+ * two.
+ */
+ if (cmpval == 0 && lower1 != lower2)
+ cmpval = lower1 ? 1 : -1;
+
+ return cmpval;
+}
+
+/*
+ * partition_rbound_datum_cmp
+ *
+ * Return whether range bound (specified in rb_datums, rb_content, and
+ * rb_lower) <=, =, >= partition key of tuple (tuple_datums)
+ */
+static int32
+partition_rbound_datum_cmp(PartitionKey key,
+ Datum *rb_datums, RangeDatumContent *rb_content,
+ Datum *tuple_datums)
+{
+ int i;
+ int32 cmpval = -1;
+
+ for (i = 0; i < key->partnatts; i++)
+ {
+ if (rb_content[i] != RANGE_DATUM_FINITE)
+ return rb_content[i] == RANGE_DATUM_NEG_INF ? -1 : 1;
+
+ cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[i],
+ key->partcollation[i],
+ rb_datums[i],
+ tuple_datums[i]));
+ if (cmpval != 0)
+ break;
+ }
+
+ return cmpval;
+}
+
+/*
+ * partition_bound_cmp
+ *
+ * Return whether the bound at offset in boundinfo is <=, =, >= the argument
+ * specified in *probe.
+ */
+static int32
+partition_bound_cmp(PartitionKey key, PartitionBoundInfo boundinfo,
+ int offset, void *probe, bool probe_is_bound)
+{
+ Datum *bound_datums = boundinfo->datums[offset];
+ int32 cmpval = -1;
+
+ switch (key->strategy)
+ {
+ case PARTITION_STRATEGY_LIST:
+ cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
+ key->partcollation[0],
+ bound_datums[0],
+ *(Datum *) probe));
+ break;
+
+ case PARTITION_STRATEGY_RANGE:
+ {
+ RangeDatumContent *content = boundinfo->content[offset];
+
+ if (probe_is_bound)
+ {
+ /*
+ * We need to pass whether the existing bound is a lower
+ * bound, so that two equal-valued lower and upper bounds
+ * are not regarded equal.
+ */
+ bool lower = boundinfo->indexes[offset] < 0;
+
+ cmpval = partition_rbound_cmp(key,
+ bound_datums, content, lower,
+ (PartitionRangeBound *) probe);
+ }
+ else
+ cmpval = partition_rbound_datum_cmp(key,
+ bound_datums, content,
+ (Datum *) probe);
+ break;
+ }
+
+ default:
+ elog(ERROR, "unexpected partition strategy: %d",
+ (int) key->strategy);
+ }
+
+ return cmpval;
+}
+
+/*
+ * Binary search on a collection of partition bounds. Returns greatest index
+ * of bound in array boundinfo->datums which is less or equal with *probe.
+ * If all bounds in the array are greater than *probe, -1 is returned.
+ *
+ * *probe could either be a partition bound or a Datum array representing
+ * the partition key of a tuple being routed; probe_is_bound tells which.
+ * We pass that down to the comparison function so that it can interpret the
+ * contents of *probe accordingly.
+ *
+ * *is_equal is set to whether the bound at the returned index is equal with
+ * *probe.
+ */
+static int
+partition_bound_bsearch(PartitionKey key, PartitionBoundInfo boundinfo,
+ void *probe, bool probe_is_bound, bool *is_equal)
+{
+ int lo,
+ hi,
+ mid;
+
+ lo = -1;
+ hi = boundinfo->ndatums - 1;
+ while (lo < hi)
+ {
+ int32 cmpval;
+
+ mid = (lo + hi + 1) / 2;
+ cmpval = partition_bound_cmp(key, boundinfo, mid, probe,
+ probe_is_bound);
+ if (cmpval <= 0)
+ {
+ lo = mid;
+ *is_equal = (cmpval == 0);
+ }
+ else
+ hi = mid - 1;
+ }
+
+ return lo;
+}
*/
recordDependencyOnSingleRelExpr(&conobject, conExpr, relId,
DEPENDENCY_NORMAL,
- DEPENDENCY_NORMAL);
+ DEPENDENCY_NORMAL, false);
}
/* Post creation hook for new constraint */
* locked the relation.
*/
if (onerel->rd_rel->relkind == RELKIND_RELATION ||
- onerel->rd_rel->relkind == RELKIND_MATVIEW)
+ onerel->rd_rel->relkind == RELKIND_MATVIEW ||
+ onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
{
/* Regular table, so we'll use the regular row acquisition function */
acquirefunc = acquire_sample_rows;
/* Check table type (MATVIEW can't happen, but might as well allow) */
if (childrel->rd_rel->relkind == RELKIND_RELATION ||
- childrel->rd_rel->relkind == RELKIND_MATVIEW)
+ childrel->rd_rel->relkind == RELKIND_MATVIEW ||
+ childrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
{
/* Regular table, so use the regular row acquisition function */
acquirefunc = acquire_sample_rows;
ExprState **defexprs; /* array of default att expressions */
bool volatile_defexprs; /* is any of defexprs volatile? */
List *range_table;
+ PartitionDispatch *partition_dispatch_info;
+ int num_dispatch;
+ int num_partitions;
+ ResultRelInfo *partitions;
+ TupleConversionMap **partition_tupconv_maps;
/*
* These variables are used to reduce overhead in textual COPY FROM.
(errcode(ERRCODE_UNDEFINED_COLUMN),
errmsg("table \"%s\" does not have OIDs",
RelationGetRelationName(cstate->rel))));
+
+ /*
+ * Initialize state for CopyFrom tuple routing. Watch out for
+ * any foreign partitions.
+ */
+ if (is_from && rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ PartitionDispatch *pd;
+ List *leaf_parts;
+ ListCell *cell;
+ int i,
+ num_parted,
+ num_leaf_parts;
+ ResultRelInfo *leaf_part_rri;
+
+ /* Get the tuple-routing information and lock partitions */
+ pd = RelationGetPartitionDispatchInfo(rel, RowExclusiveLock,
+ &num_parted, &leaf_parts);
+ num_leaf_parts = list_length(leaf_parts);
+ cstate->partition_dispatch_info = pd;
+ cstate->num_dispatch = num_parted;
+ cstate->num_partitions = num_leaf_parts;
+ cstate->partitions = (ResultRelInfo *) palloc(num_leaf_parts *
+ sizeof(ResultRelInfo));
+ cstate->partition_tupconv_maps = (TupleConversionMap **)
+ palloc0(num_leaf_parts * sizeof(TupleConversionMap *));
+
+ leaf_part_rri = cstate->partitions;
+ i = 0;
+ foreach(cell, leaf_parts)
+ {
+ Relation partrel;
+
+ /*
+ * We locked all the partitions above including the leaf
+ * partitions. Note that each of the relations in
+ * cstate->partitions will be closed by CopyFrom() after
+ * it's finished with its processing.
+ */
+ partrel = heap_open(lfirst_oid(cell), NoLock);
+
+ /*
+ * Verify result relation is a valid target for the current
+ * operation.
+ */
+ CheckValidResultRel(partrel, CMD_INSERT);
+
+ InitResultRelInfo(leaf_part_rri,
+ partrel,
+ 1, /* dummy */
+ false, /* no partition constraint check */
+ 0);
+
+ /* Open partition indices */
+ ExecOpenIndices(leaf_part_rri, false);
+
+ if (!equalTupleDescs(tupDesc, RelationGetDescr(partrel)))
+ cstate->partition_tupconv_maps[i] =
+ convert_tuples_by_name(tupDesc,
+ RelationGetDescr(partrel),
+ gettext_noop("could not convert row type"));
+ leaf_part_rri++;
+ i++;
+ }
+ }
}
else
{
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("cannot copy from sequence \"%s\"",
RelationGetRelationName(rel))));
+ else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot copy from partitioned table \"%s\"",
+ RelationGetRelationName(rel)),
+ errhint("Try the COPY (SELECT ...) TO variant.")));
else
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
Datum *values;
bool *nulls;
ResultRelInfo *resultRelInfo;
+ ResultRelInfo *saved_resultRelInfo = NULL;
EState *estate = CreateExecutorState(); /* for ExecConstraints() */
ExprContext *econtext;
TupleTableSlot *myslot;
* only hint about them in the view case.)
*/
if (cstate->rel->rd_rel->relkind != RELKIND_RELATION &&
+ cstate->rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE &&
!(cstate->rel->trigdesc &&
cstate->rel->trigdesc->trig_insert_instead_row))
{
InitResultRelInfo(resultRelInfo,
cstate->rel,
1, /* dummy rangetable index */
+ true, /* do load partition check expression */
0);
ExecOpenIndices(resultRelInfo, false);
* BEFORE/INSTEAD OF triggers, or we need to evaluate volatile default
* expressions. Such triggers or expressions might query the table we're
* inserting to, and act differently if the tuples that have already been
- * processed and prepared for insertion are not there.
+ * processed and prepared for insertion are not there. We also can't
+ * do it if the table is partitioned.
*/
if ((resultRelInfo->ri_TrigDesc != NULL &&
(resultRelInfo->ri_TrigDesc->trig_insert_before_row ||
resultRelInfo->ri_TrigDesc->trig_insert_instead_row)) ||
+ cstate->partition_dispatch_info != NULL ||
cstate->volatile_defexprs)
{
useHeapMultiInsert = false;
slot = myslot;
ExecStoreTuple(tuple, slot, InvalidBuffer, false);
+ /* Determine the partition to heap_insert the tuple into */
+ if (cstate->partition_dispatch_info)
+ {
+ int leaf_part_index;
+ TupleConversionMap *map;
+
+ /*
+ * Away we go ... If we end up not finding a partition after all,
+ * ExecFindPartition() does not return and errors out instead.
+ * Otherwise, the returned value is to be used as an index into
+ * arrays mt_partitions[] and mt_partition_tupconv_maps[] that
+ * will get us the ResultRelInfo and TupleConversionMap for the
+ * partition, respectively.
+ */
+ leaf_part_index = ExecFindPartition(resultRelInfo,
+ cstate->partition_dispatch_info,
+ slot,
+ estate);
+ Assert(leaf_part_index >= 0 &&
+ leaf_part_index < cstate->num_partitions);
+
+ /*
+ * Save the old ResultRelInfo and switch to the one corresponding
+ * to the selected partition.
+ */
+ saved_resultRelInfo = resultRelInfo;
+ resultRelInfo = cstate->partitions + leaf_part_index;
+
+ /* We do not yet have a way to insert into a foreign partition */
+ if (resultRelInfo->ri_FdwRoutine)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot route inserted tuples to a foreign table")));
+
+ /*
+ * For ExecInsertIndexTuples() to work on the partition's indexes
+ */
+ estate->es_result_relation_info = resultRelInfo;
+
+ /*
+ * We might need to convert from the parent rowtype to the
+ * partition rowtype.
+ */
+ map = cstate->partition_tupconv_maps[leaf_part_index];
+ if (map)
+ {
+ tuple = do_convert_tuple(tuple, map);
+ ExecStoreTuple(tuple, slot, InvalidBuffer, true);
+ }
+
+ tuple->t_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
+ }
+
skip_tuple = false;
/* BEFORE ROW INSERT Triggers */
else
{
/* Check the constraints of the tuple */
- if (cstate->rel->rd_att->constr)
+ if (cstate->rel->rd_att->constr ||
+ resultRelInfo->ri_PartitionCheck)
ExecConstraints(resultRelInfo, slot, estate);
if (useHeapMultiInsert)
List *recheckIndexes = NIL;
/* OK, store the tuple and create index entries for it */
- heap_insert(cstate->rel, tuple, mycid, hi_options, bistate);
+ heap_insert(resultRelInfo->ri_RelationDesc, tuple, mycid,
+ hi_options, bistate);
if (resultRelInfo->ri_NumIndices > 0)
recheckIndexes = ExecInsertIndexTuples(slot,
* tuples inserted by an INSERT command.
*/
processed++;
+
+ if (saved_resultRelInfo)
+ {
+ resultRelInfo = saved_resultRelInfo;
+ estate->es_result_relation_info = resultRelInfo;
+ }
}
}
ExecCloseIndices(resultRelInfo);
+ /* Close all the partitioned tables, leaf partitions, and their indices */
+ if (cstate->partition_dispatch_info)
+ {
+ int i;
+
+ /*
+ * Remember cstate->partition_dispatch_info[0] corresponds to the root
+ * partitioned table, which we must not try to close, because it is
+ * the main target table of COPY that will be closed eventually by
+ * DoCopy().
+ */
+ for (i = 1; i < cstate->num_dispatch; i++)
+ {
+ PartitionDispatch pd = cstate->partition_dispatch_info[i];
+
+ heap_close(pd->reldesc, NoLock);
+ }
+ for (i = 0; i < cstate->num_partitions; i++)
+ {
+ ResultRelInfo *resultRelInfo = cstate->partitions + i;
+
+ ExecCloseIndices(resultRelInfo);
+ heap_close(resultRelInfo->ri_RelationDesc, NoLock);
+ }
+ }
+
FreeExecutorState(estate);
/*
* Create the relation. (This will error out if there's an existing view,
* so we don't need more code to complain if "replace" is false.)
*/
- intoRelationAddr = DefineRelation(create, relkind, InvalidOid, NULL);
+ intoRelationAddr = DefineRelation(create, relkind, InvalidOid, NULL, NULL);
/*
* If necessary, create a TOAST table for the target table. Note that
char *accessMethodName, Oid accessMethodId,
bool amcanorder,
bool isconstraint);
-static Oid GetIndexOpClass(List *opclass, Oid attrType,
- char *accessMethodName, Oid accessMethodId);
static char *ChooseIndexName(const char *tabname, Oid namespaceId,
List *colnames, List *exclusionOpNames,
bool primary, bool isconstraint);
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("cannot create index on foreign table \"%s\"",
RelationGetRelationName(rel))));
+ else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot create index on partitioned table \"%s\"",
+ RelationGetRelationName(rel))));
else
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
/*
* Identify the opclass to use.
*/
- classOidP[attn] = GetIndexOpClass(attribute->opclass,
- atttype,
- accessMethodName,
- accessMethodId);
+ classOidP[attn] = ResolveOpClass(attribute->opclass,
+ atttype,
+ accessMethodName,
+ accessMethodId);
/*
* Identify the exclusion operator, if any.
/*
* Resolve possibly-defaulted operator class specification
+ *
+ * Note: This is used to resolve operator class specification in index and
+ * partition key definitions.
*/
-static Oid
-GetIndexOpClass(List *opclass, Oid attrType,
- char *accessMethodName, Oid accessMethodId)
+Oid
+ResolveOpClass(List *opclass, Oid attrType,
+ char *accessMethodName, Oid accessMethodId)
{
char *schemaname;
char *opcname;
* check */
/* Currently, we only allow plain tables to be locked */
- if (relkind != RELKIND_RELATION)
+ if (relkind != RELKIND_RELATION && relkind != RELKIND_PARTITIONED_TABLE)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is not a table",
rv->relname)));
/* Relation type MUST be a table. */
- if (relkind != RELKIND_RELATION)
+ if (relkind != RELKIND_RELATION && relkind != RELKIND_PARTITIONED_TABLE)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is not a table", rv->relname)));
relid = ((Form_pg_policy) GETSTRUCT(tuple))->polrelid;
rel = heap_open(relid, AccessExclusiveLock);
- if (rel->rd_rel->relkind != RELKIND_RELATION)
+ if (rel->rd_rel->relkind != RELKIND_RELATION &&
+ rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is not a table",
relation->rd_rel->relkind != RELKIND_VIEW &&
relation->rd_rel->relkind != RELKIND_MATVIEW &&
relation->rd_rel->relkind != RELKIND_COMPOSITE_TYPE &&
- relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
+ relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
+ relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is not a table, view, materialized view, composite type, or foreign table",
stmt->tablespacename = NULL;
stmt->if_not_exists = seq->if_not_exists;
- address = DefineRelation(stmt, RELKIND_SEQUENCE, seq->ownerId, NULL);
+ address = DefineRelation(stmt, RELKIND_SEQUENCE, seq->ownerId, NULL, NULL);
seqoid = address.objectId;
Assert(seqoid != InvalidOid);
/* Must be a regular or foreign table */
if (!(tablerel->rd_rel->relkind == RELKIND_RELATION ||
- tablerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE))
+ tablerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE ||
+ tablerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE))
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("referenced relation \"%s\" is not a table or foreign table",
#include "catalog/indexing.h"
#include "catalog/namespace.h"
#include "catalog/objectaccess.h"
+#include "catalog/partition.h"
#include "catalog/pg_am.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_constraint.h"
#include "nodes/parsenodes.h"
#include "optimizer/clauses.h"
#include "optimizer/planner.h"
+#include "optimizer/predtest.h"
+#include "optimizer/prep.h"
+#include "optimizer/var.h"
#include "parser/parse_clause.h"
#include "parser/parse_coerce.h"
#include "parser/parse_collate.h"
Oid newTableSpace; /* new tablespace; 0 means no change */
bool chgPersistence; /* T if SET LOGGED/UNLOGGED is used */
char newrelpersistence; /* if above is true */
+ List *partition_constraint; /* for attach partition validation */
/* Objects to rebuild after completing ALTER TYPE operations */
List *changedConstraintOids; /* OIDs of constraints to rebuild */
List *changedConstraintDefs; /* string definitions of same */
gettext_noop("foreign table \"%s\" does not exist, skipping"),
gettext_noop("\"%s\" is not a foreign table"),
gettext_noop("Use DROP FOREIGN TABLE to remove a foreign table.")},
+ {RELKIND_PARTITIONED_TABLE,
+ ERRCODE_UNDEFINED_TABLE,
+ gettext_noop("table \"%s\" does not exist"),
+ gettext_noop("table \"%s\" does not exist, skipping"),
+ gettext_noop("\"%s\" is not a table"),
+ gettext_noop("Use DROP TABLE to remove a table.")},
{'\0', 0, NULL, NULL, NULL, NULL}
};
static void truncate_check_rel(Relation rel);
static List *MergeAttributes(List *schema, List *supers, char relpersistence,
- List **supOids, List **supconstr, int *supOidCount);
+ bool is_partition, List **supOids, List **supconstr,
+ int *supOidCount);
static bool MergeCheckConstraint(List *constraints, char *name, Node *expr);
static void MergeAttributesIntoExisting(Relation child_rel, Relation parent_rel);
static void MergeConstraintsIntoExisting(Relation child_rel, Relation parent_rel);
static void add_column_collation_dependency(Oid relid, int32 attnum, Oid collid);
static void ATPrepAddOids(List **wqueue, Relation rel, bool recurse,
AlterTableCmd *cmd, LOCKMODE lockmode);
+static void ATPrepDropNotNull(Relation rel, bool recurse, bool recursing);
static ObjectAddress ATExecDropNotNull(Relation rel, const char *colName, LOCKMODE lockmode);
+static void ATPrepSetNotNull(Relation rel, bool recurse, bool recursing);
static ObjectAddress ATExecSetNotNull(AlteredTableInfo *tab, Relation rel,
const char *colName, LOCKMODE lockmode);
static ObjectAddress ATExecColumnDefault(Relation rel, const char *colName,
Oid oldRelOid, void *arg);
static void RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid,
Oid oldrelid, void *arg);
+static bool is_partition_attr(Relation rel, AttrNumber attnum, bool *used_in_expr);
+static PartitionSpec *transformPartitionSpec(Relation rel, PartitionSpec *partspec, char *strategy);
+static void ComputePartitionAttrs(Relation rel, List *partParams, AttrNumber *partattrs,
+ List **partexprs, Oid *partopclass, Oid *partcollation);
+static void CreateInheritance(Relation child_rel, Relation parent_rel);
+static void RemoveInheritance(Relation child_rel, Relation parent_rel);
+static ObjectAddress ATExecAttachPartition(List **wqueue, Relation rel,
+ PartitionCmd *cmd);
+static ObjectAddress ATExecDetachPartition(Relation rel, RangeVar *name);
/* ----------------------------------------------------------------
*/
ObjectAddress
DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
- ObjectAddress *typaddress)
+ ObjectAddress *typaddress, const char *queryString)
{
char relname[NAMEDATALEN];
Oid namespaceId;
(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
errmsg("ON COMMIT can only be used on temporary tables")));
+ if (stmt->partspec != NULL)
+ {
+ if (relkind != RELKIND_RELATION)
+ elog(ERROR, "unexpected relkind: %d", (int) relkind);
+
+ relkind = RELKIND_PARTITIONED_TABLE;
+ }
+
/*
* Look up the namespace in which we are supposed to create the relation,
* check we have permission to create there, lock it against concurrent
*/
schema = MergeAttributes(schema, stmt->inhRelations,
stmt->relation->relpersistence,
+ stmt->partbound != NULL,
&inheritOids, &old_constraints, &parentOidCount);
/*
descriptor = BuildDescForRelation(schema);
/*
- * Notice that we allow OIDs here only for plain tables, even though some
- * other relkinds can support them. This is necessary because the
- * default_with_oids GUC must apply only to plain tables and not any other
- * relkind; doing otherwise would break existing pg_dump files. We could
- * allow explicit "WITH OIDS" while not allowing default_with_oids to
- * affect other relkinds, but it would complicate interpretOidsOption().
+ * Notice that we allow OIDs here only for plain tables and partitioned
+ * tables, even though some other relkinds can support them. This is
+ * necessary because the default_with_oids GUC must apply only to plain
+ * tables and not any other relkind; doing otherwise would break existing
+ * pg_dump files. We could allow explicit "WITH OIDS" while not allowing
+ * default_with_oids to affect other relkinds, but it would complicate
+ * interpretOidsOption().
*/
localHasOids = interpretOidsOption(stmt->options,
- (relkind == RELKIND_RELATION));
+ (relkind == RELKIND_RELATION ||
+ relkind == RELKIND_PARTITIONED_TABLE));
descriptor->tdhasoid = (localHasOids || parentOidCount > 0);
+ if (stmt->partbound)
+ {
+ /* If the parent has OIDs, partitions must have them too. */
+ if (parentOidCount > 0 && !localHasOids)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot create table without OIDs as partition of table with OIDs")));
+ /* If the parent doesn't, partitions must not have them. */
+ if (parentOidCount == 0 && localHasOids)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot create table with OIDs as partition of table without OIDs")));
+ }
+
/*
* Find columns with default values and prepare for insertion of the
* defaults. Pre-cooked (that is, inherited) defaults go into a list of
*/
rel = relation_open(relationId, AccessExclusiveLock);
+ /* Process and store partition bound, if any. */
+ if (stmt->partbound)
+ {
+ Node *bound;
+ ParseState *pstate;
+ Oid parentId = linitial_oid(inheritOids);
+ Relation parent;
+
+ /* Already have strong enough lock on the parent */
+ parent = heap_open(parentId, NoLock);
+
+ /*
+ * We are going to try to validate the partition bound specification
+ * against the partition key of parentRel, so it better have one.
+ */
+ if (parent->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("\"%s\" is not partitioned",
+ RelationGetRelationName(parent))));
+
+ /* Tranform the bound values */
+ pstate = make_parsestate(NULL);
+ pstate->p_sourcetext = queryString;
+ bound = transformPartitionBound(pstate, parent, stmt->partbound);
+
+ /*
+ * Check first that the new partition's bound is valid and does not
+ * overlap with any of existing partitions of the parent - note that
+ * it does not return on error.
+ */
+ check_new_partition_bound(relname, parent, bound);
+ heap_close(parent, NoLock);
+
+ /* Update the pg_class entry. */
+ StorePartitionBound(rel, bound);
+
+ /*
+ * The code that follows may also update the pg_class tuple to update
+ * relnumchecks, so bump up the command counter to avoid the "already
+ * updated by self" error.
+ */
+ CommandCounterIncrement();
+ }
+
+ /*
+ * Process the partitioning specification (if any) and store the
+ * partition key information into the catalog.
+ */
+ if (stmt->partspec)
+ {
+ char strategy;
+ int partnatts,
+ i;
+ AttrNumber partattrs[PARTITION_MAX_KEYS];
+ Oid partopclass[PARTITION_MAX_KEYS];
+ Oid partcollation[PARTITION_MAX_KEYS];
+ List *partexprs = NIL;
+ List *cmds = NIL;
+
+ /*
+ * We need to transform the raw parsetrees corresponding to partition
+ * expressions into executable expression trees. Like column defaults
+ * and CHECK constraints, we could not have done the transformation
+ * earlier.
+ */
+ stmt->partspec = transformPartitionSpec(rel, stmt->partspec,
+ &strategy);
+ ComputePartitionAttrs(rel, stmt->partspec->partParams,
+ partattrs, &partexprs, partopclass,
+ partcollation);
+
+ partnatts = list_length(stmt->partspec->partParams);
+ StorePartitionKey(rel, strategy, partnatts, partattrs, partexprs,
+ partopclass, partcollation);
+
+ /* Force key columns to be NOT NULL when using range partitioning */
+ if (strategy == PARTITION_STRATEGY_RANGE)
+ {
+ for (i = 0; i < partnatts; i++)
+ {
+ AttrNumber partattno = partattrs[i];
+ Form_pg_attribute attform = descriptor->attrs[partattno-1];
+
+ if (partattno != 0 && !attform->attnotnull)
+ {
+ /* Add a subcommand to make this one NOT NULL */
+ AlterTableCmd *cmd = makeNode(AlterTableCmd);
+
+ cmd->subtype = AT_SetNotNull;
+ cmd->name = pstrdup(NameStr(attform->attname));
+ cmds = lappend(cmds, cmd);
+ }
+ }
+
+ /*
+ * Although, there cannot be any partitions yet, we still need to
+ * pass true for recurse; ATPrepSetNotNull() complains if we don't
+ */
+ if (cmds != NIL)
+ AlterTableInternal(RelationGetRelid(rel), cmds, true);
+ }
+ }
+
/*
* Now add any newly specified column default values and CHECK constraints
* to the new relation. These are passed to us in the form of raw
HeapTuple tuple;
struct DropRelationCallbackState *state;
char relkind;
+ char expected_relkind;
Form_pg_class classform;
LOCKMODE heap_lockmode;
return; /* concurrently dropped, so nothing to do */
classform = (Form_pg_class) GETSTRUCT(tuple);
- if (classform->relkind != relkind)
+ /*
+ * Both RELKIND_RELATION and RELKIND_PARTITIONED_TABLE are OBJECT_TABLE,
+ * but RemoveRelations() can only pass one relkind for a given relation.
+ * It chooses RELKIND_RELATION for both regular and partitioned tables.
+ * That means we must be careful before giving the wrong type error when
+ * the relation is RELKIND_PARTITIONED_TABLE.
+ */
+ if (classform->relkind == RELKIND_PARTITIONED_TABLE)
+ expected_relkind = RELKIND_RELATION;
+ else
+ expected_relkind = classform->relkind;
+
+ if (relkind != expected_relkind)
DropErrorMsgWrongType(rel->relname, classform->relkind, relkind);
/* Allow DROP to either table owner or schema owner */
relids = lappend_oid(relids, childrelid);
}
}
+ else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("must truncate child tables too")));
}
/*
InitResultRelInfo(resultRelInfo,
rel,
0, /* dummy rangetable index */
+ false,
0);
resultRelInfo++;
}
AclResult aclresult;
/* Only allow truncate on regular tables */
- if (rel->rd_rel->relkind != RELKIND_RELATION)
+ if (rel->rd_rel->relkind != RELKIND_RELATION &&
+ rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is not a table",
* of ColumnDef's.) It is destructively changed.
* 'supers' is a list of names (as RangeVar nodes) of parent relations.
* 'relpersistence' is a persistence type of the table.
+ * 'is_partition' tells if the table is a partition
*
* Output arguments:
* 'supOids' receives a list of the OIDs of the parent relations.
*/
static List *
MergeAttributes(List *schema, List *supers, char relpersistence,
- List **supOids, List **supconstr, int *supOidCount)
+ bool is_partition, List **supOids, List **supconstr,
+ int *supOidCount)
{
ListCell *entry;
List *inhSchema = NIL;
bool have_bogus_defaults = false;
int child_attno;
static Node bogus_marker = {0}; /* marks conflicting defaults */
+ List *saved_schema = NIL;
/*
* Check for and reject tables with too many columns. We perform this
errmsg("tables can have at most %d columns",
MaxHeapAttributeNumber)));
+ /*
+ * In case of a partition, there are no new column definitions, only
+ * dummy ColumnDefs created for column constraints. We merge these
+ * constraints inherited from the parent.
+ */
+ if (is_partition)
+ {
+ saved_schema = schema;
+ schema = NIL;
+ }
+
/*
* Check for duplicate names in the explicit list of attributes.
*
* on the parent table, which might otherwise be attempting to clear
* the parent's relhassubclass field, if its previous children were
* recently dropped.
+ *
+ * If the child table is a partition, then we instead grab an exclusive
+ * lock on the parent because its partition descriptor will be changed
+ * by addition of the new partition.
+ */
+ if (!is_partition)
+ relation = heap_openrv(parent, ShareUpdateExclusiveLock);
+ else
+ relation = heap_openrv(parent, AccessExclusiveLock);
+
+ /*
+ * We do not allow partitioned tables and partitions to participate
+ * in regular inheritance.
*/
- relation = heap_openrv(parent, ShareUpdateExclusiveLock);
+ if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
+ !is_partition)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot inherit from partitioned table \"%s\"",
+ parent->relname)));
+ if (relation->rd_rel->relispartition && !is_partition)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot inherit from partition \"%s\"",
+ parent->relname)));
if (relation->rd_rel->relkind != RELKIND_RELATION &&
- relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
+ relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
+ relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("inherited relation \"%s\" is not a table or foreign table",
relation->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
- errmsg("cannot inherit from temporary relation \"%s\"",
+ errmsg(!is_partition
+ ? "cannot inherit from temporary relation \"%s\""
+ : "cannot create a permanent relation as partition of temporary relation \"%s\"",
parent->relname)));
/* If existing rel is temp, it must belong to this session */
!relation->rd_islocaltemp)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
- errmsg("cannot inherit from temporary relation of another session")));
+ errmsg(!is_partition
+ ? "cannot inherit from temporary relation of another session"
+ : "cannot create as partition of temporary relation of another session")));
/*
* We should have an UNDER permission flag for this, but for now,
pfree(newattno);
/*
- * Close the parent rel, but keep our ShareUpdateExclusiveLock on it
- * until xact commit. That will prevent someone else from deleting or
- * ALTERing the parent before the child is committed.
+ * Close the parent rel, but keep our lock on it until xact commit.
+ * That will prevent someone else from deleting or ALTERing the parent
+ * before the child is committed.
*/
heap_close(relation, NoLock);
}
/*
* If we had no inherited attributes, the result schema is just the
* explicitly declared columns. Otherwise, we need to merge the declared
- * columns into the inherited schema list.
+ * columns into the inherited schema list. Although, we never have any
+ * explicitly declared columns if the table is a partition.
*/
if (inhSchema != NIL)
{
Oid defcollid,
newcollid;
+ /*
+ * Partitions have only one parent, so conflict should never
+ * occur
+ */
+ Assert(!is_partition);
+
/*
* Yes, try to merge the two column definitions. They must
* have the same type, typmod, and collation.
MaxHeapAttributeNumber)));
}
+ /*
+ * Now that we have the column definition list for a partition, we can
+ * check whether the columns referenced in column option specifications
+ * actually exist. Also, we merge the options into the corresponding
+ * column definitions.
+ */
+ if (is_partition && list_length(saved_schema) > 0)
+ {
+ schema = list_concat(schema, saved_schema);
+
+ foreach(entry, schema)
+ {
+ ColumnDef *coldef = lfirst(entry);
+ ListCell *rest = lnext(entry);
+ ListCell *prev = entry;
+
+ /*
+ * Partition column option that does not belong to a column from
+ * the parent. This works because the columns from the parent
+ * come first in the list (see above).
+ */
+ if (coldef->typeName == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" does not exist",
+ coldef->colname)));
+ while (rest != NULL)
+ {
+ ColumnDef *restdef = lfirst(rest);
+ ListCell *next = lnext(rest); /* need to save it in case
+ * we delete it */
+
+ if (strcmp(coldef->colname, restdef->colname) == 0)
+ {
+ /*
+ * merge the column options into the column from the
+ * parent
+ */
+ coldef->is_not_null = restdef->is_not_null;
+ coldef->raw_default = restdef->raw_default;
+ coldef->cooked_default = restdef->cooked_default;
+ coldef->constraints = restdef->constraints;
+ list_delete_cell(schema, rest, prev);
+ }
+ prev = rest;
+ rest = next;
+ }
+ }
+ }
+
/*
* If we found any conflicting parent default values, check to make sure
* they were overridden by the child.
relkind != RELKIND_MATVIEW &&
relkind != RELKIND_COMPOSITE_TYPE &&
relkind != RELKIND_INDEX &&
- relkind != RELKIND_FOREIGN_TABLE)
+ relkind != RELKIND_FOREIGN_TABLE &&
+ relkind != RELKIND_PARTITIONED_TABLE)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is not a table, view, materialized view, composite type, index, or foreign table",
cmd_lockmode = AlterTableGetRelOptionsLockLevel((List *) cmd->def);
break;
+ case AT_AttachPartition:
+ case AT_DetachPartition:
+ cmd_lockmode = AccessExclusiveLock;
+ break;
+
default: /* oops */
elog(ERROR, "unrecognized alter table type: %d",
(int) cmd->subtype);
break;
case AT_DropNotNull: /* ALTER COLUMN DROP NOT NULL */
ATSimplePermissions(rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+ ATPrepDropNotNull(rel, recurse, recursing);
ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode);
/* No command-specific prep needed */
pass = AT_PASS_DROP;
break;
case AT_SetNotNull: /* ALTER COLUMN SET NOT NULL */
ATSimplePermissions(rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+ ATPrepSetNotNull(rel, recurse, recursing);
ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode);
/* No command-specific prep needed */
pass = AT_PASS_ADD_CONSTR;
/* No command-specific prep needed */
pass = AT_PASS_MISC;
break;
+ case AT_AttachPartition:
+ case AT_DetachPartition:
+ ATSimplePermissions(rel, ATT_TABLE);
+ /* No command-specific prep needed */
+ pass = AT_PASS_MISC;
+ break;
default: /* oops */
elog(ERROR, "unrecognized alter table type: %d",
(int) cmd->subtype);
{
AlteredTableInfo *tab = (AlteredTableInfo *) lfirst(ltab);
- if (tab->relkind == RELKIND_RELATION ||
+ /*
+ * If the table is source table of ATTACH PARTITION command, we did
+ * not modify anything about it that will change its toasting
+ * requirement, so no need to check.
+ */
+ if (((tab->relkind == RELKIND_RELATION ||
+ tab->relkind == RELKIND_PARTITIONED_TABLE) &&
+ tab->partition_constraint == NIL) ||
tab->relkind == RELKIND_MATVIEW)
AlterTableCreateToastTable(tab->relid, (Datum) 0, lockmode);
}
case AT_GenericOptions:
ATExecGenericOptions(rel, (List *) cmd->def);
break;
+ case AT_AttachPartition:
+ ATExecAttachPartition(wqueue, rel, (PartitionCmd *) cmd->def);
+ break;
+ case AT_DetachPartition:
+ ATExecDetachPartition(rel, ((PartitionCmd *) cmd->def)->name);
+ break;
default: /* oops */
elog(ERROR, "unrecognized alter table type: %d",
(int) cmd->subtype);
* Test the current data within the table against new constraints
* generated by ALTER TABLE commands, but don't rebuild data.
*/
- if (tab->constraints != NIL || tab->new_notnull)
+ if (tab->constraints != NIL || tab->new_notnull ||
+ tab->partition_constraint != NIL)
ATRewriteTable(tab, InvalidOid, lockmode);
/*
CommandId mycid;
BulkInsertState bistate;
int hi_options;
+ List *partqualstate = NIL;
/*
* Open the relation(s). We have surely already locked the existing
}
}
+ /* Build expression execution states for partition check quals */
+ if (tab->partition_constraint)
+ {
+ needscan = true;
+ partqualstate = (List *)
+ ExecPrepareExpr((Expr *) tab->partition_constraint,
+ estate);
+ }
+
foreach(l, tab->newvals)
{
NewColumnValue *ex = lfirst(l);
}
}
+ if (partqualstate && !ExecQual(partqualstate, econtext, true))
+ ereport(ERROR,
+ (errcode(ERRCODE_CHECK_VIOLATION),
+ errmsg("partition constraint is violated by some row")));
+
/* Write the tuple out to the new relation */
if (newrel)
heap_insert(newrel, tuple, mycid, hi_options, bistate);
switch (rel->rd_rel->relkind)
{
case RELKIND_RELATION:
+ case RELKIND_PARTITIONED_TABLE:
actual_target = ATT_TABLE;
break;
case RELKIND_VIEW:
*/
if (recurse &&
(rel->rd_rel->relkind == RELKIND_RELATION ||
- rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE))
+ rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE ||
+ rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE))
{
Oid relid = RelationGetRelid(rel);
ListCell *child;
att = rel->rd_att->attrs[pg_depend->objsubid - 1];
if (rel->rd_rel->relkind == RELKIND_RELATION ||
- rel->rd_rel->relkind == RELKIND_MATVIEW)
+ rel->rd_rel->relkind == RELKIND_MATVIEW ||
+ rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
{
if (origTypeName)
ereport(ERROR,
if (recursing)
ATSimplePermissions(rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+ if (rel->rd_rel->relispartition && !recursing)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot add column to a partition")));
+
attrdesc = heap_open(AttributeRelationId, RowExclusiveLock);
/*
* Return the address of the modified column. If the column was already
* nullable, InvalidObjectAddress is returned.
*/
+
+static void
+ATPrepDropNotNull(Relation rel, bool recurse, bool recursing)
+{
+ /*
+ * If the parent is a partitioned table, like check constraints, NOT NULL
+ * constraints must be dropped from child tables.
+ */
+ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
+ !recurse && !recursing)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("constraint must be dropped from child tables too")));
+}
static ObjectAddress
ATExecDropNotNull(Relation rel, const char *colName, LOCKMODE lockmode)
{
list_free(indexoidlist);
+ /* If rel is partition, shouldn't drop NOT NULL if parent has the same */
+ if (rel->rd_rel->relispartition)
+ {
+ Oid parentId = get_partition_parent(RelationGetRelid(rel));
+ Relation parent = heap_open(parentId, AccessShareLock);
+ TupleDesc tupDesc = RelationGetDescr(parent);
+ AttrNumber parent_attnum;
+
+ parent_attnum = get_attnum(parentId, colName);
+ if (tupDesc->attrs[parent_attnum - 1]->attnotnull)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("column \"%s\" is marked NOT NULL in parent table",
+ colName)));
+ heap_close(parent, AccessShareLock);
+ }
+
+ /*
+ * If the table is a range partitioned table, check that the column
+ * is not in the partition key.
+ */
+ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ PartitionKey key = RelationGetPartitionKey(rel);
+ int partnatts = get_partition_natts(key),
+ i;
+
+ for (i = 0; i < partnatts; i++)
+ {
+ AttrNumber partattnum = get_partition_col_attnum(key, i);
+
+ if (partattnum == attnum)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("column \"%s\" is in range partition key",
+ colName)));
+ }
+ }
+
/*
* Okay, actually perform the catalog change ... if needed
*/
* Return the address of the modified column. If the column was already NOT
* NULL, InvalidObjectAddress is returned.
*/
+
+static void
+ATPrepSetNotNull(Relation rel, bool recurse, bool recursing)
+{
+ /*
+ * If the parent is a partitioned table, like check constraints, NOT NULL
+ * constraints must be added to the child tables.
+ */
+ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
+ !recurse && !recursing)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("constraint must be added to child tables too")));
+}
+
static ObjectAddress
ATExecSetNotNull(AlteredTableInfo *tab, Relation rel,
const char *colName, LOCKMODE lockmode)
if (rel->rd_rel->relkind != RELKIND_RELATION &&
rel->rd_rel->relkind != RELKIND_MATVIEW &&
rel->rd_rel->relkind != RELKIND_INDEX &&
- rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
+ rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
+ rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),