Logical replication
authorPeter Eisentraut <peter_e@gmx.net>
Thu, 19 Jan 2017 17:00:00 +0000 (12:00 -0500)
committerPeter Eisentraut <peter_e@gmx.net>
Fri, 20 Jan 2017 14:04:49 +0000 (09:04 -0500)
- Add PUBLICATION catalogs and DDL
- Add SUBSCRIPTION catalog and DDL
- Define logical replication protocol and output plugin
- Add logical replication workers

From: Petr Jelinek <petr@2ndquadrant.com>
Reviewed-by: Steve Singer <steve@ssinger.info>
Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Erik Rijkers <er@xs4all.nl>
Reviewed-by: Peter Eisentraut <peter.eisentraut@2ndquadrant.com>
119 files changed:
doc/src/sgml/catalogs.sgml
doc/src/sgml/config.sgml
doc/src/sgml/filelist.sgml
doc/src/sgml/func.sgml
doc/src/sgml/logical-replication.sgml [new file with mode: 0644]
doc/src/sgml/monitoring.sgml
doc/src/sgml/postgres.sgml
doc/src/sgml/protocol.sgml
doc/src/sgml/ref/allfiles.sgml
doc/src/sgml/ref/alter_publication.sgml [new file with mode: 0644]
doc/src/sgml/ref/alter_subscription.sgml [new file with mode: 0644]
doc/src/sgml/ref/create_publication.sgml [new file with mode: 0644]
doc/src/sgml/ref/create_subscription.sgml [new file with mode: 0644]
doc/src/sgml/ref/drop_publication.sgml [new file with mode: 0644]
doc/src/sgml/ref/drop_subscription.sgml [new file with mode: 0644]
doc/src/sgml/ref/pg_dump.sgml
doc/src/sgml/ref/psql-ref.sgml
doc/src/sgml/reference.sgml
src/Makefile
src/backend/access/transam/xact.c
src/backend/catalog/Makefile
src/backend/catalog/aclchk.c
src/backend/catalog/catalog.c
src/backend/catalog/dependency.c
src/backend/catalog/objectaddress.c
src/backend/catalog/pg_publication.c [new file with mode: 0644]
src/backend/catalog/pg_shdepend.c
src/backend/catalog/pg_subscription.c [new file with mode: 0644]
src/backend/catalog/system_views.sql
src/backend/commands/Makefile
src/backend/commands/alter.c
src/backend/commands/dbcommands.c
src/backend/commands/define.c
src/backend/commands/dropcmds.c
src/backend/commands/event_trigger.c
src/backend/commands/publicationcmds.c [new file with mode: 0644]
src/backend/commands/subscriptioncmds.c [new file with mode: 0644]
src/backend/commands/tablecmds.c
src/backend/executor/Makefile
src/backend/executor/execMain.c
src/backend/executor/execReplication.c [new file with mode: 0644]
src/backend/nodes/copyfuncs.c
src/backend/nodes/equalfuncs.c
src/backend/parser/gram.y
src/backend/postmaster/bgworker.c
src/backend/postmaster/pgstat.c
src/backend/postmaster/postmaster.c
src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
src/backend/replication/logical/Makefile
src/backend/replication/logical/launcher.c [new file with mode: 0644]
src/backend/replication/logical/proto.c [new file with mode: 0644]
src/backend/replication/logical/relation.c [new file with mode: 0644]
src/backend/replication/logical/worker.c [new file with mode: 0644]
src/backend/replication/pgoutput/Makefile [new file with mode: 0644]
src/backend/replication/pgoutput/pgoutput.c [new file with mode: 0644]
src/backend/replication/walreceiver.c
src/backend/storage/ipc/ipci.c
src/backend/storage/lmgr/lwlocknames.txt
src/backend/tcop/utility.c
src/backend/utils/cache/inval.c
src/backend/utils/cache/relcache.c
src/backend/utils/cache/syscache.c
src/backend/utils/misc/guc.c
src/backend/utils/misc/postgresql.conf.sample
src/bin/pg_dump/common.c
src/bin/pg_dump/pg_backup.h
src/bin/pg_dump/pg_backup_archiver.c
src/bin/pg_dump/pg_dump.c
src/bin/pg_dump/pg_dump.h
src/bin/pg_dump/pg_dump_sort.c
src/bin/pg_dump/pg_restore.c
src/bin/pg_dump/t/002_pg_dump.pl
src/bin/psql/command.c
src/bin/psql/describe.c
src/bin/psql/describe.h
src/bin/psql/help.c
src/bin/psql/tab-complete.c
src/include/catalog/dependency.h
src/include/catalog/indexing.h
src/include/catalog/pg_proc.h
src/include/catalog/pg_publication.h [new file with mode: 0644]
src/include/catalog/pg_publication_rel.h [new file with mode: 0644]
src/include/catalog/pg_subscription.h [new file with mode: 0644]
src/include/commands/defrem.h
src/include/commands/publicationcmds.h [new file with mode: 0644]
src/include/commands/subscriptioncmds.h [new file with mode: 0644]
src/include/executor/executor.h
src/include/nodes/nodes.h
src/include/nodes/parsenodes.h
src/include/parser/kwlist.h
src/include/pgstat.h
src/include/replication/logicallauncher.h [new file with mode: 0644]
src/include/replication/logicalproto.h [new file with mode: 0644]
src/include/replication/logicalrelation.h [new file with mode: 0644]
src/include/replication/logicalworker.h [new file with mode: 0644]
src/include/replication/pgoutput.h [new file with mode: 0644]
src/include/replication/walreceiver.h
src/include/replication/worker_internal.h [new file with mode: 0644]
src/include/storage/sinval.h
src/include/utils/acl.h
src/include/utils/inval.h
src/include/utils/rel.h
src/include/utils/relcache.h
src/include/utils/syscache.h
src/test/Makefile
src/test/perl/PostgresNode.pm
src/test/regress/expected/publication.out [new file with mode: 0644]
src/test/regress/expected/rules.out
src/test/regress/expected/sanity_check.out
src/test/regress/expected/subscription.out [new file with mode: 0644]
src/test/regress/parallel_schedule
src/test/regress/serial_schedule
src/test/regress/sql/publication.sql [new file with mode: 0644]
src/test/regress/sql/subscription.sql [new file with mode: 0644]
src/test/subscription/.gitignore [new file with mode: 0644]
src/test/subscription/Makefile [new file with mode: 0644]
src/test/subscription/README [new file with mode: 0644]
src/test/subscription/t/001_rep_changes.pl [new file with mode: 0644]
src/test/subscription/t/002_types.pl [new file with mode: 0644]

index 493050618df25c7037bdf0c48769fd0d3d75b78f..7c758a5081a2a5662032e0edd4348ea816e36fed 100644 (file)
       <entry>functions and procedures</entry>
      </row>
 
+     <row>
+      <entry><link linkend="catalog-pg-publication"><structname>pg_publication</structname></link></entry>
+      <entry>publications for logical replication</entry>
+     </row>
+
+     <row>
+      <entry><link linkend="catalog-pg-publication-rel"><structname>pg_publication_rel</structname></link></entry>
+      <entry>relation to publication mapping</entry>
+     </row>
+
      <row>
       <entry><link linkend="catalog-pg-range"><structname>pg_range</structname></link></entry>
       <entry>information about range types</entry>
       <entry>planner statistics</entry>
      </row>
 
+     <row>
+      <entry><link linkend="catalog-pg-subscription"><structname>pg_subscription</structname></link></entry>
+      <entry>logical replication subscriptions</entry>
+     </row>
+
      <row>
       <entry><link linkend="catalog-pg-tablespace"><structname>pg_tablespace</structname></link></entry>
       <entry>tablespaces within this database cluster</entry>
 
  </sect1>
 
+ <sect1 id="catalog-pg-publication">
+  <title><structname>pg_publication</structname></title>
+
+  <indexterm zone="catalog-pg-publication">
+   <primary>pg_publication</primary>
+  </indexterm>
+
+  <para>
+   The catalog <structname>pg_publication</structname> contains all
+   publications created in the database.  For more on publications see
+   <xref linkend="logical-replication-publication">.
+  </para>
+
+  <table>
+   <title><structname>pg_publication</structname> Columns</title>
+
+   <tgroup cols="4">
+    <thead>
+     <row>
+      <entry>Name</entry>
+      <entry>Type</entry>
+      <entry>References</entry>
+      <entry>Description</entry>
+     </row>
+    </thead>
+
+    <tbody>
+     <row>
+      <entry><structfield>oid</structfield></entry>
+      <entry><type>oid</type></entry>
+      <entry></entry>
+      <entry>Row identifier (hidden attribute; must be explicitly selected)</entry>
+     </row>
+
+     <row>
+      <entry><structfield>pubname</structfield></entry>
+      <entry><type>Name</type></entry>
+      <entry></entry>
+      <entry>Name of the publication</entry>
+     </row>
+
+     <row>
+      <entry><structfield>pubowner</structfield></entry>
+      <entry><type>oid</type></entry>
+      <entry><literal><link linkend="catalog-pg-authid"><structname>pg_authid</structname></link>.oid</literal></entry>
+      <entry>Owner of the publication</entry>
+     </row>
+
+     <row>
+      <entry><structfield>puballtables</structfield></entry>
+      <entry><type>bool</type></entry>
+      <entry></entry>
+      <entry>If true, this publication automatically includes all tables
+       in the database, including any that will be created in the future.
+      </entry>
+     </row>
+
+     <row>
+      <entry><structfield>pubinsert</structfield></entry>
+      <entry><type>bool</type></entry>
+      <entry></entry>
+      <entry>If true, <command>INSERT</command> operations are replicated for
+       tables in the publication.</entry>
+     </row>
+
+     <row>
+      <entry><structfield>pubupdate</structfield></entry>
+      <entry><type>bool</type></entry>
+      <entry></entry>
+      <entry>If true, <command>UPDATE</command> operations are replicated for
+       tables in the publication.</entry>
+     </row>
+
+     <row>
+      <entry><structfield>pubdelete</structfield></entry>
+      <entry><type>bool</type></entry>
+      <entry></entry>
+      <entry>If true, <command>DELETE</command> operations are replicated for
+       tables in the publication.</entry>
+     </row>
+    </tbody>
+   </tgroup>
+  </table>
+ </sect1>
+
+ <sect1 id="catalog-pg-publication-rel">
+  <title><structname>pg_publication_rel</structname></title>
+
+  <indexterm zone="catalog-pg-publication-rel">
+   <primary>pg_publication_rel</primary>
+  </indexterm>
+
+  <para>
+   The catalog <structname>pg_publication_rel</structname> contains the
+   mapping between relations and publications in the database.  This is a
+   many-to-many mapping.  See also <xref linkend="view-pg-publication-tables">
+   for a more user-friendly view of this information.
+  </para>
+
+  <table>
+   <title><structname>pg_publication_rel</structname> Columns</title>
+
+   <tgroup cols="4">
+    <thead>
+     <row>
+      <entry>Name</entry>
+      <entry>Type</entry>
+      <entry>References</entry>
+      <entry>Description</entry>
+     </row>
+    </thead>
+
+    <tbody>
+     <row>
+      <entry><structfield>prpubid</structfield></entry>
+      <entry><type>oid</type></entry>
+      <entry><literal><link linkend="catalog-pg-publication"><structname>pg_publication</structname></link>.oid</literal></entry>
+      <entry>Reference to publication</entry>
+     </row>
+
+     <row>
+      <entry><structfield>prrelid</structfield></entry>
+      <entry><type>oid</type></entry>
+      <entry><literal><link linkend="catalog-pg-class"><structname>pg_class</structname></link>.oid</literal></entry>
+      <entry>Reference to relation</entry>
+     </row>
+    </tbody>
+   </tgroup>
+  </table>
+ </sect1>
+
  <sect1 id="catalog-pg-range">
   <title><structname>pg_range</structname></title>
 
 
  </sect1>
 
+ <sect1 id="catalog-pg-subscription">
+  <title><structname>pg_subscription</structname></title>
+
+  <indexterm zone="catalog-pg-subscription">
+   <primary>pg_subscription</primary>
+  </indexterm>
+
+  <para>
+   The catalog <structname>pg_subscription</structname> contains all existing
+   logical replication subscriptions.  For more information about logical
+   replication see <xref linkend="logical-replication">.
+  </para>
+
+  <para>
+   Unlike most system catalogs, <structname>pg_subscription</structname> is
+   shared across all databases of a cluster: There is only one copy
+   of <structname>pg_subscription</structname> per cluster, not one per
+   database.
+  </para>
+
+  <para>
+   Access to this catalog is restricted from normal users.  Normal users can
+   use the view <xref linkend="pg-stat-subscription"> to get some information
+   about subscriptions.
+  </para>
+
+  <table>
+   <title><structname>pg_subscription</structname> Columns</title>
+
+   <tgroup cols="4">
+    <thead>
+     <row>
+      <entry>Name</entry>
+      <entry>Type</entry>
+      <entry>References</entry>
+      <entry>Description</entry>
+     </row>
+    </thead>
+
+    <tbody>
+     <row>
+      <entry><structfield>oid</structfield></entry>
+      <entry><type>oid</type></entry>
+      <entry></entry>
+      <entry>Row identifier (hidden attribute; must be explicitly selected)</entry>
+     </row>
+
+     <row>
+      <entry><structfield>subdbid</structfield></entry>
+      <entry><type>oid</type></entry>
+      <entry><literal><link linkend="catalog-pg-database"><structname>pg_database</structname></link>.oid</literal></entry>
+      <entry>OID of the database which the subscription resides in</entry>
+     </row>
+
+     <row>
+      <entry><structfield>subname</structfield></entry>
+      <entry><type>name</type></entry>
+      <entry></entry>
+      <entry>Name of the subscription</entry>
+     </row>
+
+     <row>
+      <entry><structfield>subowner</structfield></entry>
+      <entry><type>oid</type></entry>
+      <entry><literal><link linkend="catalog-pg-authid"><structname>pg_authid</structname></link>.oid</literal></entry>
+      <entry>Owner of the subscription</entry>
+     </row>
+
+     <row>
+      <entry><structfield>subenabled</structfield></entry>
+      <entry><type>bool</type></entry>
+      <entry></entry>
+      <entry>If true, the subscription is enabled and should be replicating.</entry>
+     </row>
+
+     <row>
+      <entry><structfield>subconninfo</structfield></entry>
+      <entry><type>text</type></entry>
+      <entry></entry>
+      <entry>Connection string to the upstream database</entry>
+     </row>
+
+     <row>
+      <entry><structfield>subslotname</structfield></entry>
+      <entry><type>name</type></entry>
+      <entry></entry>
+      <entry>Name of the replication slot in the upstream database. Also used
+       for local replication origin name.</entry>
+     </row>
+
+     <row>
+      <entry><structfield>subpublications</structfield></entry>
+      <entry><type>text[]</type></entry>
+      <entry></entry>
+      <entry>Array of subscribed publication names. These reference the
+       publications on the publisher server. For more on publications
+       see <xref linkend="logical-replication-publication">.
+      </entry>
+     </row>
+    </tbody>
+   </tgroup>
+  </table>
+ </sect1>
 
  <sect1 id="catalog-pg-tablespace">
   <title><structname>pg_tablespace</structname></title>
       <entry>prepared transactions</entry>
      </row>
 
+     <row>
+      <entry><link linkend="view-pg-publication-tables"><structname>pg_publication_tables</structname></link></entry>
+      <entry>publications and their associated tables</entry>
+     </row>
+
      <row>
       <entry><link linkend="view-pg-replication-origin-status"><structname>pg_replication_origin_status</structname></link></entry>
       <entry>information about replication origins, including replication progress</entry>
@@ -8871,6 +9125,61 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx
 
  </sect1>
 
+ <sect1 id="view-pg-publication-tables">
+  <title><structname>pg_publication_tables</structname></title>
+
+  <indexterm zone="view-pg-publication-tables">
+   <primary>pg_publication_tables</primary>
+  </indexterm>
+
+  <para>
+   The view <structname>pg_publication_tables</structname> provides
+   information about the mapping between publications and the tables they
+   contain.  Unlike the underlying
+   catalog <structname>pg_publication_rel</structname>, this view expands
+   publications defined as <literal>FOR ALL TABLES</literal>, so for such
+   publications there will be a row for each eligible table.
+  </para>
+
+  <table>
+   <title><structname>pg_publication_tables</structname> Columns</title>
+
+   <tgroup cols="4">
+    <thead>
+     <row>
+      <entry>Name</entry>
+      <entry>Type</entry>
+      <entry>References</entry>
+      <entry>Description</entry>
+     </row>
+    </thead>
+
+    <tbody>
+     <row>
+      <entry><structfield>pubname</structfield></entry>
+      <entry><type>name</type></entry>
+      <entry><literal><link linkend="catalog-pg-publication"><structname>pg_publication</structname></link>.pubname</literal></entry>
+      <entry>Name of publication</entry>
+     </row>
+
+     <row>
+      <entry><structfield>schemaname</structfield></entry>
+      <entry><type>name</type></entry>
+      <entry><literal><link linkend="catalog-pg-namespace"><structname>pg_namespace</structname></link>.nspname</literal></entry>
+      <entry>Name of schema containing table</entry>
+     </row>
+
+     <row>
+      <entry><structfield>tablename</structfield></entry>
+      <entry><type>name</type></entry>
+      <entry><literal><link linkend="catalog-pg-class"><structname>pg_class</structname></link>.relname</literal></entry>
+      <entry>Name of table</entry>
+     </row>
+    </tbody>
+   </tgroup>
+  </table>
+ </sect1>
+
   <sect1 id="view-pg-replication-origin-status">
   <title><structname>pg_replication_origin_status</structname></title>
 
index 07afa3c77a78ef740646b9aa7c7c3fe4a6f1b508..fb5d6473efee9cb8f916cae32b917fb7872be7ae 100644 (file)
@@ -3411,6 +3411,47 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
 
      </variablelist>
     </sect2>
+
+    <sect2 id="runtime-config-replication-subscriber">
+     <title>Subscribers</title>
+
+     <para>
+      These settings control the behavior of a logical replication subscriber.
+      Their values on the publisher are irrelevant.
+     </para>
+
+     <para>
+      Note that <varname>wal_receiver_timeout</varname> and
+      <varname>wal_retrieve_retry_interval</varname> configuration parameters
+      affect the logical replication workers as well.
+     </para>
+
+     <variablelist>
+
+     <varlistentry id="guc-max-logical-replication-workers" xreflabel="max_logical_replication_workers">
+      <term><varname>max_logical_replication_workers</varname> (<type>int</type>)
+      <indexterm>
+       <primary><varname>max_logical_replication_workers</> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Specifies maximum number of logical replication workers. This includes
+        both apply workers and table synchronization workers.
+       </para>
+       <para>
+        Logical replication workers are taken from the pool defined by
+        <varname>max_worker_processes</varname>.
+       </para>
+       <para>
+        The default value is 4.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     </variablelist>
+    </sect2>
+
    </sect1>
 
    <sect1 id="runtime-config-query">
index 69649a7da4bbcf87aff2cfc4de44394dbf68460d..2624c627dcb3d36228cc40d2073ca55f7c8a9cf9 100644 (file)
@@ -50,6 +50,7 @@
 <!ENTITY config        SYSTEM "config.sgml">
 <!ENTITY user-manag    SYSTEM "user-manag.sgml">
 <!ENTITY wal           SYSTEM "wal.sgml">
+<!ENTITY logical-replication    SYSTEM "logical-replication.sgml">
 
 <!-- programmer's guide -->
 <!ENTITY bgworker   SYSTEM "bgworker.sgml">
index 2504a466e6d01712cf83cdb2079b56277934c271..b214218791e435194f6666f68e46f26a799e196c 100644 (file)
@@ -18762,7 +18762,7 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
       </row>
 
       <row>
-       <entry>
+       <entry id="pg-replication-origin-advance">
         <indexterm>
          <primary>pg_replication_origin_advance</primary>
         </indexterm>
diff --git a/doc/src/sgml/logical-replication.sgml b/doc/src/sgml/logical-replication.sgml
new file mode 100644 (file)
index 0000000..9312c0c
--- /dev/null
@@ -0,0 +1,396 @@
+<!-- doc/src/sgml/logical-replication.sgml -->
+
+<chapter id="logical-replication">
+ <title>Logical Replication</title>
+
+ <para>
+  Logical replication is a method of replicating data objects and their
+  changes, based upon their replication identity (usually a primary key).  We
+  use the term logical in contrast to physical replication, which uses exact
+  block addresses and byte-by-byte replication.  PostgreSQL supports both
+  mechanisms concurrently, see <xref linkend="high-availability">.  Logical
+  replication allows fine-grained control over both data replication and
+  security.
+ </para>
+
+ <para>
+  Logical replication uses a <firstterm>publish</firstterm>
+  and <firstterm>subscribe</firstterm> model with one or
+  more <firstterm>subscribers</firstterm> subscribing to one or more
+  <firstterm>publications</firstterm> on a <firstterm>publisher</firstterm>
+  node.  Subscribers pull data from the publications they subscribe to and may
+  subsequently re-publish data to allow cascading replication or more complex
+  configurations.
+ </para>
+
+ <para>
+  Logical replication sends the changes on the publisher to the subscriber as
+  they occur in real-time.  The subscriber applies the data in the same order
+  as the publisher so that transactional consistency is guaranteed for
+  publications within a single subscription.  This method of data replication
+  is sometimes referred to as transactional replication.
+ </para>
+
+ <para>
+  The typical use-cases for logical replication are:
+
+  <itemizedlist>
+   <listitem>
+    <para>
+     Sending incremental changes in a single database or a subset of a
+     database to subscribers as they occur.
+    </para>
+   </listitem>
+
+   <listitem>
+    <para>
+     Firing triggers for individual changes as they are incoming to
+     subscriber.
+    </para>
+   </listitem>
+
+   <listitem>
+    <para>
+     Consolidating multiple databases into a single one (for example for
+     analytical purposes).
+    </para>
+   </listitem>
+
+   <listitem>
+    <para>
+     Replicating between different major versions of PostgreSQL.
+    </para>
+   </listitem>
+
+   <listitem>
+    <para>
+     Giving access to replicated data to different groups of users.
+    </para>
+   </listitem>
+
+   <listitem>
+    <para>
+     Sharing a subset of the database between multiple databases.
+    </para>
+   </listitem>
+  </itemizedlist>
+ </para>
+
+ <para>
+  The subscriber database behaves in the same way as any other PostgreSQL
+  instance and can be used as a publisher for other databases by defining its
+  own publications.  When the subscriber is treated as read-only by
+  application, there will be no conflicts from a single subscription.  On the
+  other hand, if there are other writes done either by application or other
+  subscribers to the same set of tables conflicts can arise.
+ </para>
+
+ <sect1 id="logical-replication-publication">
+  <title>Publication</title>
+
+  <para>
+   A <firstterm>publication</firstterm> object can be defined on any physical
+   replication master.  The node where a publication is defined is referred to
+   as <firstterm>publisher</firstterm>.  A publication is a set of changes
+   generated from a group of tables, and might also be described as a change
+   set or replication set.  Each publication exists in only one database.
+  </para>
+
+  <para>
+   Publications are different from schemas and do not affect how the table is
+   accessed.  Each table can be added to multiple publications if needed.
+   Publications may currently only contain tables.  Objects must be added
+   explicitly, except when a publication is created for <literal>ALL
+   TABLES</literal>.
+  </para>
+
+  <para>
+   Publications can choose to limit the changes they produce to show
+   any combination of <command>INSERT</command>, <command>UPDATE</command>, and
+   <command>DELETE</command> in a similar way to the way triggers are fired by
+   particular event types.  If a table without a <literal>REPLICA
+   IDENTITY</literal> is added to a publication that
+   replicates <command>UPDATE</command> or <command>DELETE</command>
+   operations then subsequent <command>UPDATE</command>
+   or <command>DELETE</command> operations will fail on the publisher.
+  </para>
+
+  <para>
+   Every publication can have multiple subscribers.
+  </para>
+
+  <para>
+   A publication is created using the <xref linkend="sql-createpublication">
+   command and may be later altered or dropped using corresponding commands.
+  </para>
+
+  <para>
+   The individual tables can be added and removed dynamically using
+   <xref linkend="sql-alterpublication">.  Both the <literal>ADD
+   TABLE</literal> and <literal>DROP TABLE</literal> operations are
+   transactional; so the table will start or stop replicating at the correct
+   snapshot once the transaction has committed.
+  </para>
+ </sect1>
+
+ <sect1 id="logical-replication-subscription">
+  <title>Subscription</title>
+
+  <para>
+   A <firstterm>subscription</firstterm> is the downstream side of logical
+   replication.  The node where a subscription is defined is referred to as
+   the <firstterm>subscriber</firstterm>.  Subscription defines the connection
+   to another database and set of publications (one or more) to which it wants
+   to be subscribed.
+  </para>
+
+  <para>
+   The subscriber database behaves in the same way as any other PostgreSQL
+   instance and can be used as a publisher for other databases by defining its
+   own publications.
+  </para>
+
+  <para>
+   A subscriber node may have multiple subscriptions if desired.  It is
+   possible to define multiple subscriptions between a single
+   publisher-subscriber pair, in which case extra care must be taken to ensure
+   that the subscribed publication objects don't overlap.
+  </para>
+
+  <para>
+   Each subscription will receive changes via one replication slot (see
+   <xref linkend="streaming-replication-slots">).
+  </para>
+
+  <para>
+   Subscriptions are not dumped by <command>pg_dump</command> by default but
+   can be requested using the command-line
+   option <option>--subscriptions</option>.
+  </para>
+
+  <para>
+   The subscription is added using <xref linkend="sql-createsubscription"> and
+   can be stopped/resumed at any time using the
+   <xref linkend="sql-altersubscription"> command and removed using
+   <xref linkend="sql-dropsubscription">.
+  </para>
+
+  <para>
+   When a subscription is dropped and recreated, the synchronization
+   information is lost.  This means that the data has to be resynchronized
+   afterwards.
+  </para>
+
+  <para>
+   The schema definitions are not replicated and the published tables must
+   exist on the subsriber for replication to work.  Only regular tables may be
+   the target of replication.  For example, you can't replicate to a view.
+  </para>
+
+  <para>
+   The tables are matched between the publisher and the subscriber using the
+   fully qualified table name.  Replication to differently-named tables on the
+   subscriber is not supported.
+  </para>
+
+  <para>
+   Columns of a table are also matched by name.  A different order of columns
+   in the target table is allowed, but the column types have to match.
+  </para>
+ </sect1>
+
+ <sect1 id="logical-replication-conflicts">
+  <title>Conflicts</title>
+
+  <para>
+   The logical replication behaves similarly to normal DML operations in that
+   the data will be updated even if it was changed locally on the subscriber
+   node.  If the incoming data violates any constraints the replication will
+   stop.  This is referred to as a <firstterm>conflict</firstterm>.  When
+   replicating <command>UPDATE</command> or <command>DELETE</command>
+   operations, missing data will not produce a conflict and such operations
+   will simply be skipped.
+  </para>
+
+  <para>
+   A conflict will produce an error and will stop the replication; it must be
+   resolved manually by the user.  Details about the conflict can be found in
+   the subscriber's server log.
+  </para>
+
+  <para>
+   The resolution can be done either by changing data on the subscriber so
+   that it does not conflict with the incoming change or by skipping the
+   transaction that conflicts with the existing data.  The transaction can be
+   skipped by calling the <link linkend="pg-replication-origin-advance">
+   <function>pg_replication_origin_advance()</function></link> function with
+   a <parameter>node_name</parameter> corresponding to the subscription name.
+   The current position of origins can be seen in the
+   <link linkend="view-pg-replication-origin-status">
+   <structname>pg_replication_origin_status</structname></link> system view.
+  </para>
+ </sect1>
+
+ <sect1 id="logical-replication-architecture">
+  <title>Architecture</title>
+
+  <para>
+   Logical replication starts by copying a snapshot of the data on the
+   publisher database.  Once that is done, changes on the publisher are sent
+   to the subscriber as they occur in real time.  The subscriber applies data
+   in the order in which commits were made on the publisher so that
+   transactional consistency is guaranteed for the publications within any
+   single subscription.
+  </para>
+
+  <para>
+   Logical replication is built with an architecture similar to physical
+   streaming replication (see <xref linkend="streaming-replication">).  It is
+   implemented by <quote>walsender</quote> and the <quote>apply</quote>
+   processes.  The walsender starts logical decoding (described
+   in <xref linkend="logicaldecoding">) of the WAL and loads the standard
+   logical decoding plugin (pgoutput).  The plugin transforms the changes read
+   from WAL to the logical replication protocol
+   (see <xref linkend="protocol-logical-replication">) and filters the data
+   according to the publication specification.  The data is then continuously
+   transferred using the streaming replication protocol to the apply worker,
+   which maps the data to local tables and applies the individual changes as
+   they are received in exact transactional order.
+  </para>
+
+  <para>
+   The apply process on the subscriber database always runs with
+   <varname>session_replication_role</varname> set
+   to <literal>replica</literal>, which produces the usual effects on triggers
+   and constraints.
+  </para>
+ </sect1>
+
+  <sect1 id="logical-replication-monitoring">
+  <title>Monitoring</title>
+
+  <para>
+   Because logical replication is based on similar architecture as
+   <link linkend="streaming-replication">physical streaming replication</link>
+   the monitoring on a publication node is very similar to monitoring of
+   physical replication master
+   (see <xref linkend="streaming-replication-monitoring">).
+  </para>
+
+  <para>
+   The monitoring information about subscription is visible in
+   <link linkend="pg-stat-subscription"><literal>pg_stat_subscription</literal></link>.
+   This view contains one row for every subscription worker.  A subscription
+   can have zero or more active subscription workers depending on its state.
+  </para>
+
+  <para>
+   Normally, there is a single apply process running for an enabled
+   subscription.  A disabled subscription or a crashed subscription will have
+   zero rows in this view.
+  </para>
+ </sect1>
+
+ <sect1 id="logical-replication-security">
+  <title>Security</title>
+
+  <para>
+   Logical replication connections occur in the same way as physical streaming
+   replication.  It requires access to be specifically given using
+   <filename>pg_hba.conf</filename>.  The role used for the replication
+   connection must have the <literal>REPLICATION</literal> attribute.  This
+   gives a role access to both logical and physical replication.
+  </para>
+
+  <para>
+   To create a publication, the user must have the <literal>CREATE</literal>
+   privilege in the database.
+  </para>
+
+  <para>
+   To create a subscription, the user must be a superuser.
+  </para>
+
+  <para>
+   The subscription apply process will run in the local database with the
+   privileges of a superuser.
+  </para>
+
+  <para>
+   Privileges are only checked once at the start of a replication connection.
+   They are not re-checked as each change record is read from the publisher,
+   nor are they re-checked for each change when applied.
+  </para>
+ </sect1>
+
+ <sect1 id="logical-replication-config">
+  <title>Configuration Settings</title>
+
+  <para>
+   Logical replication requires several configuration options to be set.
+  </para>
+
+  <para>
+   On the publisher side, <varname>wal_level</varname> must be set to
+   <literal>logical</literal>, and <varname>max_replication_slots</varname>
+   has to be set to at least the number of subscriptions expected to connect.
+   And <varname>max_wal_senders</varname> should be set to at least the same
+   as <varname>max_replication_slots</varname> plus the number of physical replicas
+   that are connected at the same time.
+  </para>
+
+  <para>
+   The subscriber also requires the <varname>max_replication_slots</varname>
+   to be set.  In this case it should be set to at least the number of
+   subscriptions that will be added to the subscriber.
+   <varname>max_logical_replication_workers</varname> has to be set to at
+   least the number of subscriptions.  Additionally the
+   <varname>max_worker_processes</varname> may need to be adjusted to
+   accommodate for replication workers, at least
+   (<varname>max_logical_replication_workers</varname>
+   + <literal>1</literal>).  Note that some extensions and parallel queries
+   also take worker slots from <varname>max_worker_processes</varname>.
+  </para>
+ </sect1>
+
+ <sect1 id="logical-replication-quick-setup">
+  <title>Quick Setup</title>
+
+  <para>
+   First set the configuration options in <filename>postgresql.conf</filename>:
+<programlisting>
+wal_level = logical
+</programlisting>
+   The other required settings have default values that are sufficient for a
+   basic setup.
+  </para>
+
+  <para>
+   <filename>pg_hba.conf</filename> needs to be adjusted to allow replication
+   (the values here depend on your actual network configuration and user you
+   want to use for connecting):
+<programlisting>
+host    replication     repuser     0.0.0.0/0       md5
+</programlisting>
+  </para>
+
+  <para>
+   Then on the publisher database:
+<programlisting>
+CREATE PUBLICATION mypub FOR TABLE users, departments;
+</programlisting>
+  </para>
+
+  <para>
+   And on the subscriber database:
+<programlisting>
+CREATE SUBSCRIPTION mysub CONNECTION 'dbname=foo host=bar user=repuser' PUBLICATION mypub;
+</programlisting>
+  </para>
+
+  <para>
+   The above will start the replication process of changes to
+   <literal>users</literal> and <literal>departments</literal> tables.
+  </para>
+ </sect1>
+</chapter>
index 1545f03656c94f76ec2cb5c4b703300f8da88666..01fad3870f678350017d204687cdba457af3937f 100644 (file)
@@ -308,6 +308,14 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
       </entry>
      </row>
 
+     <row>
+      <entry><structname>pg_stat_subscription</><indexterm><primary>pg_stat_subscription</primary></indexterm></entry>
+      <entry>At least one row per subscription, showing information about
+       the subscription workers.
+       See <xref linkend="pg-stat-subscription"> for details.
+      </entry>
+     </row>
+
      <row>
       <entry><structname>pg_stat_ssl</><indexterm><primary>pg_stat_ssl</primary></indexterm></entry>
       <entry>One row per connection (regular and replication), showing information about
@@ -1545,6 +1553,72 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
    connected server.
   </para>
 
+  <table id="pg-stat-subscription" xreflabel="pg_stat_subscription">
+   <title><structname>pg_stat_subscription</structname> View</title>
+   <tgroup cols="3">
+    <thead>
+    <row>
+      <entry>Column</entry>
+      <entry>Type</entry>
+      <entry>Description</entry>
+     </row>
+    </thead>
+
+   <tbody>
+    <row>
+     <entry><structfield>subid</></entry>
+     <entry><type>oid</></entry>
+     <entry>OID of the subscription</entry>
+    </row>
+    <row>
+     <entry><structfield>subname</></entry>
+     <entry><type>text</></entry>
+     <entry>Name of the subscription</entry>
+    </row>
+    <row>
+     <entry><structfield>pid</></entry>
+     <entry><type>integer</></entry>
+     <entry>Process ID of the subscription worker process</entry>
+    </row>
+    <row>
+     <entry><structfield>received_lsn</></entry>
+     <entry><type>pg_lsn</></entry>
+     <entry>Last transaction log position received, the initial value of
+      this field being 0</entry>
+    </row>
+    <row>
+     <entry><structfield>last_msg_send_time</></entry>
+     <entry><type>timestamp with time zone</></entry>
+     <entry>Send time of last message received from origin WAL sender</entry>
+    </row>
+    <row>
+     <entry><structfield>last_msg_receipt_time</></entry>
+     <entry><type>timestamp with time zone</></entry>
+     <entry>Receipt time of last message received from origin WAL sender
+     </entry>
+    </row>
+    <row>
+     <entry><structfield>latest_end_lsn</></entry>
+     <entry><type>pg_lsn</></entry>
+     <entry>Last transaction log position reported to origin WAL sender
+     </entry>
+    </row>
+    <row>
+     <entry><structfield>latest_end_time</></entry>
+     <entry><type>timestamp with time zone</></entry>
+     <entry>Time of last transaction log position reported to origin WAL
+      sender</entry>
+    </row>
+   </tbody>
+   </tgroup>
+  </table>
+
+  <para>
+   The <structname>pg_stat_subscription</structname> view will contain one
+   row per subscription for main worker (with null PID if the worker is
+   not running).
+  </para>
+
   <table id="pg-stat-ssl-view" xreflabel="pg_stat_ssl">
    <title><structname>pg_stat_ssl</structname> View</title>
    <tgroup cols="3">
index 9143917c490b753e832bb301923b8e2f691c5ea7..4e169d1b18961c98b55ac023374ebed737f3e62b 100644 (file)
   &monitoring;
   &diskusage;
   &wal;
+  &logical-replication;
   &regress;
 
  </part>
index 9ba147cae5e7917e06fdc11ad92ae905ce17fee2..5f89db5857021d6dba0dcf460f2d850a96ed9813 100644 (file)
@@ -2122,6 +2122,119 @@ The commands accepted in walsender mode are:
 
 </sect1>
 
+<sect1 id="protocol-logical-replication">
+ <title>Logical Streaming Replication Protocol</title>
+
+ <para>
+  This section describes the logical replication protocol, which is the message
+  flow started by the <literal>START_REPLICATION</literal>
+  <literal>SLOT</literal> <replaceable class="parameter">slot_name</>
+  <literal>LOGICAL</literal> replication command.
+ </para>
+
+ <para>
+  The logical streaming replication protocol builds on the primitives of
+  the physical streaming replication protocol.
+ </para>
+
+ <sect2 id="protocol-logical-replication-params">
+  <title>Logical Streaming Replication Parameters</title>
+
+  <para>
+   The logical replication <literal>START_REPLICATION</literal> command
+   accepts following parameters:
+
+   <variablelist>
+    <varlistentry>
+     <term>
+      proto_version
+     </term>
+     <listitem>
+      <para>
+       Protocol version. Currently only version <literal>1</literal> is
+       supported.
+      </para>
+     </listitem>
+    </varlistentry>
+
+    <varlistentry>
+     <term>
+      publication_names
+     </term>
+     <listitem>
+      <para>
+       Comma separated list of publication names for which to subscribe
+       (receive changes). The individual publication names are treated
+       as standard objects names and can be quoted the same as needed.
+      </para>
+     </listitem>
+    </varlistentry>
+   </variablelist>
+
+  </para>
+ </sect2>
+
+ <sect2 id="protocol-logical-messages">
+  <title>Logical Replication Protocol Messages</title>
+
+  <para>
+   The individual protocol messages are discussed in the following
+   subsections. Individual messages are describer in
+   <xref linkend="protocol-logicalrep-message-formats"> section.
+  </para>
+
+  <para>
+   All top-level protocol messages begin with a message type byte.
+   While represented in code as a character, this is a signed byte with no
+   associated encoding.
+  </para>
+
+  <para>
+   Since the streaming replication protocol supplies a message length there
+   is no need for top-level protocol messages to embed a length in their
+   header.
+  </para>
+
+ </sect2>
+
+ <sect2 id="protocol-logical-messages-flow">
+  <title>Logical Replication Protocol Message Flow</title>
+
+  <para>
+   With the exception of the <literal>START_REPLICATION</literal> command and
+   the replay progress messages, all information flows only from the backend
+   to the frontend.
+  </para>
+
+  <para>
+   The logical replication protocol sends individual transactions one by one.
+   This means that all messages between a pair of Begin and Commit messages
+   belong to the same transaction.
+  </para>
+
+  <para>
+   Every sent transaction contains zero or more DML messages (Insert,
+   Update, Delete). In case of a cascaded setup it can also contain Origin
+   messages. The origin message indicated that the transaction originated on
+   different replication node. Since a replication node in the scope of logical
+   replication protocol can be pretty much anything, the only identifier
+   is the origin name. It's downstream's responsibility to handle this as
+   needed (if needed). The Origin message is always sent before any DML
+   messages in the transaction.
+  </para>
+
+  <para>
+   Every DML message contains an arbitrary relation ID, which can be mapped to
+   an ID in the Relation messages. The Relation messages describe the schema of the
+   given relation. The Relation message is sent for a given relation either
+   because it is the first time we send a DML message for given relation in the
+   current session or because the relation definition has changed since the
+   last Relation message was sent for it. The protocol assumes that the client
+   is capable of caching the metadata for as many relations as needed.
+  </para>
+ </sect2>
+</sect1>
+
 <sect1 id="protocol-message-types">
 <title>Message Data Types</title>
 
@@ -5149,6 +5262,614 @@ not line breaks.
 
 </sect1>
 
+<sect1 id="protocol-logicalrep-message-formats">
+<title>Logical Replication Message Formats</title>
+
+<para>
+This section describes the detailed format of each logical replication message.
+These messages are returned either by the replication slot SQL interface or are
+sent by a walsender. In case of a walsender they are encapsulated inside the replication
+protocol WAL messages as described in <xref linkend="protocol-replication">
+and generally obey same message flow as physical replication.
+</para>
+
+<variablelist>
+
+<varlistentry>
+<term>
+Begin
+</term>
+<listitem>
+<para>
+
+<variablelist>
+<varlistentry>
+<term>
+        Byte1('B')
+</term>
+<listitem>
+<para>
+                Identifies the message as a begin message.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+        Int64
+</term>
+<listitem>
+<para>
+                The final LSN of the transaction.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+        Int64
+</term>
+<listitem>
+<para>
+                Commit timestamp of the transaction. The value is in number
+                of microseconds since PostgreSQL epoch (2000-01-01).
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+        Int32
+</term>
+<listitem>
+<para>
+                Xid of the transaction.
+</para>
+</listitem>
+</varlistentry>
+
+</variablelist>
+</para>
+</listitem>
+</varlistentry>
+
+<varlistentry>
+<term>
+Commit
+</term>
+<listitem>
+<para>
+
+<variablelist>
+<varlistentry>
+<term>
+        Byte1('C')
+</term>
+<listitem>
+<para>
+                Identifies the message as a commit message.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+        Int64
+</term>
+<listitem>
+<para>
+                The LSN of the commit.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+        Int64
+</term>
+<listitem>
+<para>
+                The end LSN of the transaction.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+        Int64
+</term>
+<listitem>
+<para>
+                Commit timestamp of the transaction. The value is in number
+                of microseconds since PostgreSQL epoch (2000-01-01).
+</para>
+</listitem>
+</varlistentry>
+
+</variablelist>
+</para>
+</listitem>
+</varlistentry>
+
+<varlistentry>
+<term>
+Origin
+</term>
+<listitem>
+<para>
+
+<variablelist>
+<varlistentry>
+<term>
+        Byte1('O')
+</term>
+<listitem>
+<para>
+                Identifies the message as an origin message.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+        Int64
+</term>
+<listitem>
+<para>
+                The LSN of the commit on the origin server.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+        String
+</term>
+<listitem>
+<para>
+                Name of the origin.
+</para>
+</listitem>
+</varlistentry>
+
+</variablelist>
+</para>
+
+<para>
+  Note that there can be multiple Origin messages inside a single transaction.
+</para>
+
+</listitem>
+</varlistentry>
+
+<varlistentry>
+<term>
+Relation
+</term>
+<listitem>
+<para>
+
+<variablelist>
+<varlistentry>
+<term>
+        Byte1('R')
+</term>
+<listitem>
+<para>
+                Identifies the message as a relation message.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+        Int32
+</term>
+<listitem>
+<para>
+                ID of the relation.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+        String
+</term>
+<listitem>
+<para>
+                Namespace (empty string for <literal>pg_catalog</literal>).
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+        String
+</term>
+<listitem>
+<para>
+                Relation name.
+</para>
+</listitem>
+</varlistentry>
+
+<varlistentry>
+<term>
+        Int8
+</term>
+<listitem>
+<para>
+                Replica identity setting for the relation (same as
+                <structfield>relreplident</structfield> in <structname>pg_class</structname>).
+</para>
+</listitem>
+</varlistentry>
+
+<varlistentry>
+<term>
+        Int16
+</term>
+<listitem>
+<para>
+                Number of columns.
+</para>
+</listitem>
+</varlistentry>
+</variablelist>
+        Next, the following message part appears for each column:
+<variablelist>
+<varlistentry>
+<term>
+        Int8
+</term>
+<listitem>
+<para>
+                Flags for the column. Currently can be either 0 for no flags
+                or 1 which marks the column as part of the key.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+        String
+</term>
+<listitem>
+<para>
+                Name of the column.
+</para>
+</listitem>
+</varlistentry>
+
+</variablelist>
+</para>
+</listitem>
+</varlistentry>
+
+<varlistentry>
+<term>
+Insert
+</term>
+<listitem>
+<para>
+
+<variablelist>
+<varlistentry>
+<term>
+        Byte1('I')
+</term>
+<listitem>
+<para>
+                Identifies the message as an insert message.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+        Int32
+</term>
+<listitem>
+<para>
+                ID of the relation corresponding to the ID in the relation
+                message.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+        Byte1('N')
+</term>
+<listitem>
+<para>
+                Identifies the following TupleData message as a new tuple.
+</para>
+</listitem>
+</varlistentry>
+
+<varlistentry>
+<term>
+        TupleData
+</term>
+<listitem>
+<para>
+                TupleData message part representing the contents of new tuple.
+</para>
+</listitem>
+</varlistentry>
+
+</variablelist>
+</para>
+</listitem>
+</varlistentry>
+
+<varlistentry>
+<term>
+Update
+</term>
+<listitem>
+<para>
+
+<variablelist>
+<varlistentry>
+<term>
+        Byte1('U')
+</term>
+<listitem>
+<para>
+                Identifies the message as an update message.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+        Int32
+</term>
+<listitem>
+<para>
+                ID of the relation corresponding to the ID in the relation
+                message.
+</para>
+</listitem>
+</varlistentry>
+
+<varlistentry>
+<term>
+        Byte1('K')
+</term>
+<listitem>
+<para>
+                Identifies the following TupleData submessage as a key.
+                This field is optional and is only present if
+                the update changed data in any of the column(s) that are
+                part of the REPLICA IDENTITY index.
+</para>
+</listitem>
+</varlistentry>
+
+<varlistentry>
+<term>
+        Byte1('O')
+</term>
+<listitem>
+<para>
+                Identifies the following TupleData submessage as an old tuple.
+                This field is optional and is only present if table in which
+                the update happened has REPLICA IDENTITY set to FULL.
+</para>
+</listitem>
+</varlistentry>
+
+<varlistentry>
+<term>
+        TupleData
+</term>
+<listitem>
+<para>
+                TupleData message part representing the contents of the old tuple
+                or primary key. Only present if the previous 'O' or 'K' part
+                is present.
+</para>
+</listitem>
+</varlistentry>
+
+<varlistentry>
+<term>
+        Byte1('N')
+</term>
+<listitem>
+<para>
+                Identifies the following TupleData message as a new tuple.
+</para>
+</listitem>
+</varlistentry>
+
+<varlistentry>
+<term>
+        TupleData
+</term>
+<listitem>
+<para>
+                TupleData message part representing the contents of a new tuple.
+</para>
+</listitem>
+</varlistentry>
+
+</variablelist>
+</para>
+
+<para>
+    The Update message may contain either a 'K' message part or an 'O' message part
+    or neither of them, but never both of them.
+</para>
+
+</listitem>
+</varlistentry>
+
+<varlistentry>
+<term>
+Delete
+</term>
+<listitem>
+<para>
+
+<variablelist>
+<varlistentry>
+<term>
+        Byte1('D')
+</term>
+<listitem>
+<para>
+                Identifies the message as a delete message.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+        Int32
+</term>
+<listitem>
+<para>
+                ID of the relation corresponding to the ID in the relation
+                message.
+</para>
+</listitem>
+</varlistentry>
+
+<varlistentry>
+<term>
+        Byte1('K')
+</term>
+<listitem>
+<para>
+                Identifies the following TupleData submessage as a key.
+                This field is present if the table in which the delete has
+                happened uses an index as REPLICA IDENTITY.
+</para>
+</listitem>
+</varlistentry>
+
+<varlistentry>
+<term>
+        Byte1('O')
+</term>
+<listitem>
+<para>
+                Identifies the following TupleData message as a old tuple.
+                This field is is present if the table in which the delete has
+                happened has REPLICA IDENTITY set to FULL.
+</para>
+</listitem>
+</varlistentry>
+
+<varlistentry>
+<term>
+        TupleData
+</term>
+<listitem>
+<para>
+                TupleData message part representing the contents of the old tuple
+                or primary key, depending on the previous field.
+</para>
+</listitem>
+</varlistentry>
+</variablelist>
+</para>
+
+<para>
+    The Delete message may contain either a 'K' message part or an 'O' message part,
+    but never both of them.
+</para>
+
+</listitem>
+</varlistentry>
+
+</variablelist>
+
+<para>
+
+Following message parts that are shared by above messages.
+
+</para>
+
+<variablelist>
+
+<varlistentry>
+<term>
+TupleData
+</term>
+<listitem>
+<para>
+
+<variablelist>
+<varlistentry>
+<term>
+        Int16
+</term>
+<listitem>
+<para>
+                Number of columns.
+</para>
+</listitem>
+</varlistentry>
+</variablelist>
+        Next, one of the following submessages appears for each column:
+<variablelist>
+<varlistentry>
+<term>
+        Byte1('n')
+</term>
+<listitem>
+<para>
+                Idenfifies the data as NULL value.
+</para>
+</listitem>
+</varlistentry>
+</variablelist>
+        Or
+<variablelist>
+<varlistentry>
+<term>
+        Byte1('u')
+</term>
+<listitem>
+<para>
+                Idenfifies unchanged TOASTed value (the actual value is not
+                sent).
+</para>
+</listitem>
+</varlistentry>
+</variablelist>
+        Or
+<variablelist>
+<varlistentry>
+<term>
+        Byte1('t')
+</term>
+<listitem>
+<para>
+                Idenfifies the data as text formatted value.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+        Int32
+</term>
+<listitem>
+<para>
+                Length of the column value.
+</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>
+        String
+</term>
+<listitem>
+<para>
+                The text value.
+</para>
+</listitem>
+</varlistentry>
+
+</variablelist>
+</para>
+</listitem>
+</varlistentry>
+
+</variablelist>
+
+</sect1>
+
 <sect1 id="protocol-changes">
 <title>Summary of Changes since Protocol 2.0</title>
 
index 77667bdebd1e68028658a0274da3ad32e89b4def..0d09f81ccc75a732953742e2766d6a93fde9e0ea 100644 (file)
@@ -26,11 +26,13 @@ Complete list of usable sgml source files in this directory.
 <!ENTITY alterOperatorClass SYSTEM "alter_opclass.sgml">
 <!ENTITY alterOperatorFamily SYSTEM "alter_opfamily.sgml">
 <!ENTITY alterPolicy        SYSTEM "alter_policy.sgml">
+<!ENTITY alterPublication   SYSTEM "alter_publication.sgml">
 <!ENTITY alterRole          SYSTEM "alter_role.sgml">
 <!ENTITY alterRule          SYSTEM "alter_rule.sgml">
 <!ENTITY alterSchema        SYSTEM "alter_schema.sgml">
 <!ENTITY alterServer        SYSTEM "alter_server.sgml">
 <!ENTITY alterSequence      SYSTEM "alter_sequence.sgml">
+<!ENTITY alterSubscription  SYSTEM "alter_subscription.sgml">
 <!ENTITY alterSystem        SYSTEM "alter_system.sgml">
 <!ENTITY alterTable         SYSTEM "alter_table.sgml">
 <!ENTITY alterTableSpace    SYSTEM "alter_tablespace.sgml">
@@ -72,11 +74,13 @@ Complete list of usable sgml source files in this directory.
 <!ENTITY createOperatorClass SYSTEM "create_opclass.sgml">
 <!ENTITY createOperatorFamily SYSTEM "create_opfamily.sgml">
 <!ENTITY createPolicy       SYSTEM "create_policy.sgml">
+<!ENTITY createPublication  SYSTEM "create_publication.sgml">
 <!ENTITY createRole         SYSTEM "create_role.sgml">
 <!ENTITY createRule         SYSTEM "create_rule.sgml">
 <!ENTITY createSchema       SYSTEM "create_schema.sgml">
 <!ENTITY createSequence     SYSTEM "create_sequence.sgml">
 <!ENTITY createServer       SYSTEM "create_server.sgml">
+<!ENTITY createSubscription SYSTEM "create_subscription.sgml">
 <!ENTITY createTable        SYSTEM "create_table.sgml">
 <!ENTITY createTableAs      SYSTEM "create_table_as.sgml">
 <!ENTITY createTableSpace   SYSTEM "create_tablespace.sgml">
@@ -116,11 +120,13 @@ Complete list of usable sgml source files in this directory.
 <!ENTITY dropOperatorFamily  SYSTEM "drop_opfamily.sgml">
 <!ENTITY dropOwned          SYSTEM "drop_owned.sgml">
 <!ENTITY dropPolicy         SYSTEM "drop_policy.sgml">
+<!ENTITY dropPublication    SYSTEM "drop_publication.sgml">
 <!ENTITY dropRole           SYSTEM "drop_role.sgml">
 <!ENTITY dropRule           SYSTEM "drop_rule.sgml">
 <!ENTITY dropSchema         SYSTEM "drop_schema.sgml">
 <!ENTITY dropSequence       SYSTEM "drop_sequence.sgml">
 <!ENTITY dropServer         SYSTEM "drop_server.sgml">
+<!ENTITY dropSubscription   SYSTEM "drop_subscription.sgml">
 <!ENTITY dropTable          SYSTEM "drop_table.sgml">
 <!ENTITY dropTableSpace     SYSTEM "drop_tablespace.sgml">
 <!ENTITY dropTransform      SYSTEM "drop_transform.sgml">
diff --git a/doc/src/sgml/ref/alter_publication.sgml b/doc/src/sgml/ref/alter_publication.sgml
new file mode 100644 (file)
index 0000000..47d83b8
--- /dev/null
@@ -0,0 +1,139 @@
+<!--
+doc/src/sgml/ref/alter_publication.sgml
+PostgreSQL documentation
+-->
+
+<refentry id="SQL-ALTERPUBLICATION">
+ <indexterm zone="sql-alterpublication">
+  <primary>ALTER PUBLICATION</primary>
+ </indexterm>
+
+ <refmeta>
+  <refentrytitle>ALTER PUBLICATION</refentrytitle>
+  <manvolnum>7</manvolnum>
+  <refmiscinfo>SQL - Language Statements</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+  <refname>ALTER PUBLICATION</refname>
+  <refpurpose>change the definition of a publication</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+<synopsis>
+ALTER PUBLICATION <replaceable class="PARAMETER">name</replaceable> WITH ( <replaceable class="PARAMETER">option</replaceable> [, ... ] )
+
+<phrase>where <replaceable class="PARAMETER">option</replaceable> can be:</phrase>
+
+      PUBLISH INSERT | NOPUBLISH INSERT
+    | PUBLISH UPDATE | NOPUBLISH UPDATE
+    | PUBLISH DELETE | NOPUBLISH DELETE
+
+ALTER PUBLICATION <replaceable class="PARAMETER">name</replaceable> OWNER TO { <replaceable>new_owner</replaceable> | CURRENT_USER | SESSION_USER }
+ALTER PUBLICATION <replaceable class="PARAMETER">name</replaceable> ADD TABLE <replaceable class="PARAMETER">table_name</replaceable> [, ...]
+ALTER PUBLICATION <replaceable class="PARAMETER">name</replaceable> SET TABLE <replaceable class="PARAMETER">table_name</replaceable> [, ...]
+ALTER PUBLICATION <replaceable class="PARAMETER">name</replaceable> DROP TABLE <replaceable class="PARAMETER">table_name</replaceable> [, ...]
+</synopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+  <title>Description</title>
+
+  <para>
+   The first variant of this command listed in the synopsis can change
+   all of the publication properties specified in
+   <xref linkend="sql-createpublication">.  Properties not mentioned in the
+   command retain their previous settings.  Database superusers can change any
+   of these settings for any role.
+  </para>
+
+  <para>
+   To alter the owner, you must also be a direct or indirect member of the
+   new owning role. The new owner has to be a superuser
+  </para>
+
+  <para>
+   The other variants of this command deal with the table membership of the
+   publication.  The <literal>SET TABLE</literal> clause will replace the
+   list of tables in the publication with the specified one.
+   The <literal>ADD TABLE</literal> and
+   <literal>DROP TABLE</literal> will add and remove one or more tables from
+   the publication.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Parameters</title>
+
+  <variablelist>
+   <varlistentry>
+    <term><replaceable class="parameter">name</replaceable></term>
+    <listitem>
+     <para>
+      The name of an existing publication whose definition is to be altered.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><literal>PUBLISH INSERT</literal></term>
+    <term><literal>NOPUBLISH INSERT</literal></term>
+    <term><literal>PUBLISH UPDATE</literal></term>
+    <term><literal>NOPUBLISH UPDATE</literal></term>
+    <term><literal>PUBLISH DELETE</literal></term>
+    <term><literal>NOPUBLISH DELETE</literal></term>
+    <listitem>
+     <para>
+      These clauses alter properties originally set by
+      <xref linkend="SQL-CREATEPUBLICATION">.  See there for more information.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="parameter">table_name</replaceable></term>
+    <listitem>
+     <para>
+      Name of an existing table.
+     </para>
+    </listitem>
+   </varlistentry>
+  </variablelist>
+ </refsect1>
+
+ <refsect1>
+  <title>Examples</title>
+
+  <para>
+   Change the publication to not publish inserts:
+<programlisting>
+ALTER PUBLICATION noinsert WITH (NOPUBLISH INSERT);
+</programlisting>
+  </para>
+
+  <para>
+   Add some tables to the publication:
+<programlisting>
+ALTER PUBLICATION mypublication ADD TABLE users, departments;
+</programlisting>
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Compatibility</title>
+
+  <para>
+   <command>ALTER PUBLICATION</command> is a <productname>PostgreSQL</>
+   extension.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>See Also</title>
+
+  <simplelist type="inline">
+   <member><xref linkend="sql-createpublication"></member>
+   <member><xref linkend="sql-droppublication"></member>
+  </simplelist>
+ </refsect1>
+</refentry>
diff --git a/doc/src/sgml/ref/alter_subscription.sgml b/doc/src/sgml/ref/alter_subscription.sgml
new file mode 100644 (file)
index 0000000..032ecbb
--- /dev/null
@@ -0,0 +1,139 @@
+<!--
+doc/src/sgml/ref/alter_subscription.sgml
+PostgreSQL documentation
+-->
+
+<refentry id="SQL-ALTERSUBSCRIPTION">
+ <indexterm zone="sql-altersubscription">
+  <primary>ALTER SUBSCRIPTION</primary>
+ </indexterm>
+
+ <refmeta>
+  <refentrytitle>ALTER SUBSCRIPTION</refentrytitle>
+  <manvolnum>7</manvolnum>
+  <refmiscinfo>SQL - Language Statements</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+  <refname>ALTER SUBSCRIPTION</refname>
+  <refpurpose>change the definition of a subscription</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+<synopsis>
+ALTER SUBSCRIPTION <replaceable class="PARAMETER">name</replaceable> WITH ( <replaceable class="PARAMETER">option</replaceable> [, ... ] ) ]
+
+<phrase>where <replaceable class="PARAMETER">option</replaceable> can be:</phrase>
+
+  SLOT NAME = slot_name
+
+ALTER SUBSCRIPTION <replaceable class="PARAMETER">name</replaceable> OWNER TO { <replaceable>new_owner</replaceable> | CURRENT_USER | SESSION_USER }
+ALTER SUBSCRIPTION <replaceable class="PARAMETER">name</replaceable> CONNECTION 'conninfo'
+ALTER SUBSCRIPTION <replaceable class="PARAMETER">name</replaceable> SET PUBLICATION publication_name [, ...]
+ALTER SUBSCRIPTION <replaceable class="PARAMETER">name</replaceable> ENABLE
+ALTER SUBSCRIPTION <replaceable class="PARAMETER">name</replaceable> DISABLE
+</synopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+  <title>Description</title>
+
+  <para>
+   <command>ALTER SUBSCRIPTION</command> can change most of the subscription
+   properties that can be specified
+   in <xref linkend="sql-createsubscription">.
+  </para>
+
+  <para>
+   To alter the owner, you must also be a direct or indirect member of the
+   new owning role. The new owner has to be a superuser
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Parameters</title>
+
+  <variablelist>
+   <varlistentry>
+    <term><replaceable class="parameter">name</replaceable></term>
+    <listitem>
+     <para>
+      The name of a subscription whose properties are to be altered.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><literal>CONNECTION '<replaceable class="parameter">conninfo</replaceable>'</literal></term>
+    <term><literal>SET PUBLICATION <replaceable class="parameter">publication_name</replaceable></literal></term>
+    <term><literal>SLOT NAME = <replaceable class="parameter">slot_name</replaceable></literal></term>
+    <listitem>
+     <para>
+      These clauses alter properties originally set by
+      <xref linkend="SQL-CREATESUBSCRIPTION">.  See there for more
+      information.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><literal>ENABLE</literal></term>
+    <listitem>
+     <para>
+      Enables the previously disabled subscription, starting the logical
+      replication worker at the end of transaction.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><literal>DISABLE</literal></term>
+    <listitem>
+     <para>
+      Disables the running subscription, stopping the logical replication
+      worker at the end of transaction.
+     </para>
+    </listitem>
+   </varlistentry>
+  </variablelist>
+ </refsect1>
+
+ <refsect1>
+  <title>Examples</title>
+
+  <para>
+   Change the publication subscribed by a subscription to
+   <literal>insert_only</literal>:
+<programlisting>
+ALTER SUBSCRIPTION mysub SET PUBLICATION insert_only;
+</programlisting>
+  </para>
+
+  <para>
+   Disable (stop) the subscription:
+<programlisting>
+ALTER SUBSCRIPTION mysub DISABLE;
+</programlisting>
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Compatibility</title>
+
+  <para>
+   <command>ALTER SUBSCRIPTION</command> is a <productname>PostgreSQL</>
+   extension.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>See Also</title>
+
+  <simplelist type="inline">
+   <member><xref linkend="sql-createsubscription"></member>
+   <member><xref linkend="sql-dropsubscription"></member>
+   <member><xref linkend="sql-createpublication"></member>
+   <member><xref linkend="sql-alterpublication"></member>
+  </simplelist>
+ </refsect1>
+</refentry>
diff --git a/doc/src/sgml/ref/create_publication.sgml b/doc/src/sgml/ref/create_publication.sgml
new file mode 100644 (file)
index 0000000..995f2bc
--- /dev/null
@@ -0,0 +1,206 @@
+<!--
+doc/src/sgml/ref/create_publication.sgml
+PostgreSQL documentation
+-->
+
+<refentry id="SQL-CREATEPUBLICATION">
+ <indexterm zone="sql-createpublication">
+  <primary>CREATE PUBLICATION</primary>
+ </indexterm>
+
+ <refmeta>
+  <refentrytitle>CREATE PUBLICATION</refentrytitle>
+  <manvolnum>7</manvolnum>
+  <refmiscinfo>SQL - Language Statements</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+  <refname>CREATE PUBLICATION</refname>
+  <refpurpose>define a new publication</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+<synopsis>
+CREATE PUBLICATION <replaceable class="parameter">name</replaceable>
+    [ FOR TABLE <replaceable class="parameter">table_name</replaceable> [, ...]
+      | FOR ALL TABLES ]
+    [ WITH ( <replaceable class="parameter">option</replaceable> [, ... ] ) ]
+
+<phrase>where <replaceable class="parameter">option</replaceable> can be:</phrase>
+
+      PUBLISH INSERT | NOPUBLISH INSERT
+    | PUBLISH UPDATE | NOPUBLISH UPDATE
+    | PUBLISH DELETE | NOPUBLISH DELETE
+</synopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+  <title>Description</title>
+
+  <para>
+   <command>CREATE PUBLICATION</command> adds a new publication
+   into the current database.  The publication name must be distinct from
+   the name of any existing publication in the current database.
+  </para>
+
+  <para>
+   A publication is essentially a group of tables whose data changes are
+   intended to be replicated through logical replication.  See
+   <xref linkend="logical-replication-publication"> for details about how
+   publications fit into the logical replication setup.
+   </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Parameters</title>
+
+  <variablelist>
+   <varlistentry>
+    <term><replaceable class="parameter">name</replaceable></term>
+    <listitem>
+     <para>
+      The name of the new publication.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><literal>FOR TABLE</literal></term>
+    <listitem>
+     <para>
+      Specifies a list of tables to add to the publication.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><literal>FOR ALL TABLES</literal></term>
+    <listitem>
+     <para>
+      Marks the publication as one that replicates changes for all tables in
+      the database, including tables created in the future.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><literal>PUBLISH INSERT</literal></term>
+    <term><literal>NOPUBLISH INSERT</literal></term>
+    <listitem>
+     <para>
+      These clauses determine whether the new publication will send
+      the <command>INSERT</command> operations to the subscribers.
+      <literal>PUBLISH INSERT</literal> is the default.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><literal>PUBLISH UPDATE</literal></term>
+    <term><literal>NOPUBLISH UPDATE</literal></term>
+    <listitem>
+     <para>
+      These clauses determine whether the new publication will send
+      the <command>UPDATE</command> operations to the subscribers.
+      <literal>PUBLISH UPDATE</literal> is the default.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><literal>PUBLISH DELETE</literal></term>
+    <term><literal>NOPUBLISH DELETE</literal></term>
+    <listitem>
+     <para>
+      These clauses determine whether the new publication will send
+      the <command>DELETE</command> operations to the subscribers.
+      <literal>PUBLISH DELETE</literal> is the default.
+     </para>
+    </listitem>
+   </varlistentry>
+
+  </variablelist>
+ </refsect1>
+
+ <refsect1>
+  <title>Notes</title>
+
+  <para>
+   If neither <literal>FOR TABLE</literal> nor <literal>FOR ALL
+   TABLES</literal> is specified, then the publication starts out with an
+   empty set of tables.  That is useful if tables are to be added later.
+  </para>
+
+  <para>
+   The creation of a publication does not start replication.  It only defines
+   a grouping and filtering logic for future subscribers.
+  </para>
+
+  <para>
+   To create a publication, the invoking user must have the
+   <literal>CREATE</> privilege for the current database.
+   (Of course, superusers bypass this check.)
+  </para>
+
+  <para>
+   To add a table to a publication, the invoking user must have
+   <command>SELECT</command> privilege on given table.  The
+   <command>FOR ALL TABLES</command> clause requires superuser.
+  </para>
+
+  <para>
+   The tables added to a publication that publishes <command>UPDATE</command>
+   and/or <command>DELETE</command> operations must have
+   <literal>REPLICA IDENTITY</> defined.  Otherwise those operations will be
+   disallowed on those tables.
+  </para>
+
+  <para>
+   For an <command>INSERT ... ON CONFLICT</> command, the publication will
+   publish the operation that actually results from the command.  So depending
+   of the outcome, it may be published as either <command>INSERT</command> or
+   <command>UPDATE</command>, or it may not be published at all.
+  </para>
+
+  <para>
+   <command>TRUNCATE</command> and other <acronym>DDL</acronym> operations
+   are not published.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Examples</title>
+
+  <para>
+   Create a simple publication that just publishes all DML for tables in it:
+<programlisting>
+CREATE PUBLICATION mypublication;
+</programlisting>
+  </para>
+
+  <para>
+   Create an insert-only publication:
+<programlisting>
+CREATE PUBLICATION insert_only WITH (NOPUBLISH UPDATE, NOPUBLISH DELETE);
+</programlisting>
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Compatibility</title>
+
+  <para>
+   <command>CREATE PUBLICATION</command> is a <productname>PostgreSQL</>
+   extension.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>See Also</title>
+
+  <simplelist type="inline">
+   <member><xref linkend="sql-alterpublication"></member>
+   <member><xref linkend="sql-droppublication"></member>
+  </simplelist>
+ </refsect1>
+</refentry>
diff --git a/doc/src/sgml/ref/create_subscription.sgml b/doc/src/sgml/ref/create_subscription.sgml
new file mode 100644 (file)
index 0000000..40d08b3
--- /dev/null
@@ -0,0 +1,176 @@
+<!--
+doc/src/sgml/ref/create_subscription.sgml
+PostgreSQL documentation
+-->
+
+<refentry id="SQL-CREATESUBSCRIPTION">
+ <indexterm zone="sql-createsubscription">
+  <primary>CREATE SUBSCRIPTION</primary>
+ </indexterm>
+
+ <refmeta>
+  <refentrytitle>CREATE SUBSCRIPTION</refentrytitle>
+  <manvolnum>7</manvolnum>
+  <refmiscinfo>SQL - Language Statements</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+  <refname>CREATE SUBSCRIPTION</refname>
+  <refpurpose>define a new subscription</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+<synopsis>
+CREATE SUBSCRIPTION <replaceable class="PARAMETER">subscription_name</replaceable> CONNECTION 'conninfo' PUBLICATION { publication_name [, ...] } [ WITH ( <replaceable class="PARAMETER">option</replaceable> [, ... ] ) ]
+
+<phrase>where <replaceable class="PARAMETER">option</replaceable> can be:</phrase>
+
+    | ENABLED | DISABLED
+    | CREATE SLOT | NOCREATE SLOT
+    | SLOT NAME = slot_name
+</synopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+  <title>Description</title>
+
+  <para>
+   <command>CREATE SUBSCRIPTION</command> adds a new subscription for a
+   current database.  The subscription name must be distinct from the name of
+   any existing subscription in the database.
+  </para>
+
+  <para>
+   The subscription represents a replication connection to the publisher.  As
+   such this command does not only add definitions in the local catalogs but
+   also creates a replication slot on the publisher.
+  </para>
+
+  <para>
+   A logical replication worker will be started to replicate data for the new
+   subscription at the commit of the transaction where this command is run.
+  </para>
+
+  <para>
+   Additional info about subscriptions and logical replication as a whole
+   can is available at <xref linkend="logical-replication-subscription"> and
+   <xref linkend="logical-replication">.
+  </para>
+
+ </refsect1>
+
+ <refsect1>
+  <title>Parameters</title>
+
+  <variablelist>
+   <varlistentry>
+    <term><replaceable class="parameter">subscription_name</replaceable></term>
+    <listitem>
+     <para>
+      The name of the new subscription.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><literal>CONNECTION '<replaceable class="parameter">conninfo</replaceable>'</literal></term>
+    <listitem>
+     <para>
+      The connection string to the publisher.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><literal>PUBLICATION <replaceable class="parameter">publication_name</replaceable></literal></term>
+    <listitem>
+     <para>
+      Name(s) of the publications on the publisher to subscribe to.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><literal>ENABLED</literal></term>
+    <term><literal>DISABLED</literal></term>
+    <listitem>
+     <para>
+      Specifies whether the subscription should be actively replicating or
+      if it should be just setup but not started yet.  Note that the
+      replication slot as described above is created in either case.
+      <literal>ENABLED</literal> is the default.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><literal>CREATE SLOT</literal></term>
+    <term><literal>NOCREATE SLOT</literal></term>
+    <listitem>
+     <para>
+      Specifies whether the command should create the replication slot on the
+      publisher. <literal>CREATE SLOT</literal> is the default.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><literal>SLOT NAME = <replaceable class="parameter">slot_name</replaceable></literal></term>
+    <listitem>
+     <para>
+      Name of the replication slot to use. The default behavior is to use
+      <literal>subscription_name</> for slot name.
+     </para>
+    </listitem>
+   </varlistentry>
+  </variablelist>
+ </refsect1>
+
+ <refsect1>
+  <title>Examples</title>
+
+  <para>
+   Create a subscription to a remote server that replicates tables in
+   the publications <literal>mypubclication</literal> and
+   <literal>insert_only</literal> and starts replicating immediately on
+   commit:
+<programlisting>
+CREATE SUBSCRIPTION mysub
+         CONNECTION 'host=192.168.1.50 port=5432 user=foo dbname=foodb password=foopass'
+        PUBLICATION mypublication, insert_only;
+</programlisting>
+  </para>
+
+  <para>
+   Create a subscription to a remote server that replicates tables in
+   the <literal>insert_only</literal> publication and does not start replicating
+   until enabled at a later time.
+<programlisting>
+CREATE SUBSCRIPTION mysub
+         CONNECTION 'host=192.168.1.50 port=5432 user=foo dbname=foodb password=foopass'
+        PUBLICATION insert_only
+               WITH (DISABLED);
+</programlisting>
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Compatibility</title>
+
+  <para>
+   <command>CREATE SUBSCRIPTION</command> is a <productname>PostgreSQL</>
+   extension.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>See Also</title>
+
+  <simplelist type="inline">
+   <member><xref linkend="sql-altersubscription"></member>
+   <member><xref linkend="sql-dropsubscription"></member>
+   <member><xref linkend="sql-createpublication"></member>
+   <member><xref linkend="sql-alterpublication"></member>
+  </simplelist>
+ </refsect1>
+</refentry>
diff --git a/doc/src/sgml/ref/drop_publication.sgml b/doc/src/sgml/ref/drop_publication.sgml
new file mode 100644 (file)
index 0000000..1a1be57
--- /dev/null
@@ -0,0 +1,107 @@
+<!--
+doc/src/sgml/ref/drop_publication.sgml
+PostgreSQL documentation
+-->
+
+<refentry id="SQL-DROPPUBLICATION">
+ <indexterm zone="sql-droppublication">
+  <primary>DROP PUBLICATION</primary>
+ </indexterm>
+
+ <refmeta>
+  <refentrytitle>DROP PUBLICATION</refentrytitle>
+  <manvolnum>7</manvolnum>
+  <refmiscinfo>SQL - Language Statements</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+  <refname>DROP PUBLICATION</refname>
+  <refpurpose>remove a publication</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+<synopsis>
+DROP PUBLICATION [ IF EXISTS ] <replaceable class="PARAMETER">name</replaceable> [, ...] [ CASCADE | RESTRICT ]
+</synopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+  <title>Description</title>
+
+  <para>
+   <command>DROP PUBLICATION</command> removes an existing publication from
+   the database.
+  </para>
+
+  <para>
+   A publication can only be dropped by its owner or a superuser.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Parameters</title>
+
+  <variablelist>
+   <varlistentry>
+    <term><literal>IF EXISTS</literal></term>
+    <listitem>
+     <para>
+      Do not throw an error if the extension does not exist. A notice is issued
+      in this case.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="parameter">name</replaceable></term>
+    <listitem>
+     <para>
+      The name of an existing publication.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><literal>CASCADE</literal></term>
+    <term><literal>RESTRICT</literal></term>
+
+    <listitem>
+     <para>
+      These key words do not have any effect, since there are no dependencies
+      on publications.
+     </para>
+    </listitem>
+   </varlistentry>
+  </variablelist>
+ </refsect1>
+
+ <refsect1>
+  <title>Examples</title>
+
+  <para>
+   Drop a publication:
+<programlisting>
+DROP PUBLICATION mypublication;
+</programlisting>
+  </para>
+
+ </refsect1>
+
+ <refsect1>
+  <title>Compatibility</title>
+
+  <para>
+   <command>DROP PUBLICATION</command> is a <productname>PostgreSQL</>
+   extension.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>See Also</title>
+
+  <simplelist type="inline">
+   <member><xref linkend="sql-createpublication"></member>
+   <member><xref linkend="sql-alterpublication"></member>
+  </simplelist>
+ </refsect1>
+</refentry>
diff --git a/doc/src/sgml/ref/drop_subscription.sgml b/doc/src/sgml/ref/drop_subscription.sgml
new file mode 100644 (file)
index 0000000..9f2fb93
--- /dev/null
@@ -0,0 +1,110 @@
+<!--
+doc/src/sgml/ref/drop_subscription.sgml
+PostgreSQL documentation
+-->
+
+<refentry id="SQL-DROPSUBSCRIPTION">
+ <indexterm zone="sql-dropsubscription">
+  <primary>DROP SUBSCRIPTION</primary>
+ </indexterm>
+
+ <refmeta>
+  <refentrytitle>DROP SUBSCRIPTION</refentrytitle>
+  <manvolnum>7</manvolnum>
+  <refmiscinfo>SQL - Language Statements</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+  <refname>DROP SUBSCRIPTION</refname>
+  <refpurpose>remove a subscription</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+<synopsis>
+DROP SUBSCRIPTION [ IF EXISTS ] <replaceable class="parameter">name</replaceable> [ <replaceable class="parameter">DROP SLOT</replaceable> | <replaceable class="parameter">NODROP SLOT</replaceable> ]
+</synopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+  <title>Description</title>
+
+  <para>
+   <command>DROP SUBSCRIPTION</command> removes a subscription from the
+   database cluster.
+  </para>
+
+  <para>
+   A subscription can only be dropped by a superuser.
+  </para>
+
+  <para>
+   The replication worker associated with the subscription will not stop until
+   after the transaction that issued this command has committed.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>Parameters</title>
+
+  <variablelist>
+   <varlistentry>
+    <term><replaceable class="parameter">name</replaceable></term>
+    <listitem>
+     <para>
+      The name of a subscription to be dropped.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="parameter">DROP SLOT</replaceable></term>
+    <term><replaceable class="parameter">NODROP SLOT</replaceable></term>
+    <listitem>
+     <para>
+      Specifies whether to drop the replication slot on the publisher.  The
+      default is
+      <literal>DROP SLOT</literal>.
+     </para>
+
+     <para>
+      If the publisher is not reachable when the subscription is to be
+      dropped, then it is useful to specify <literal>NODROP SLOT</literal>.
+      But the replication slot on the publisher will then have to be removed
+      manually.
+     </para>
+    </listitem>
+   </varlistentry>
+
+  </variablelist>
+ </refsect1>
+
+ <refsect1>
+  <title>Examples</title>
+
+  <para>
+   Drop a subscription:
+<programlisting>
+DROP SUBSCRIPTION mysub;
+</programlisting>
+  </para>
+
+ </refsect1>
+
+ <refsect1>
+  <title>Compatibility</title>
+
+  <para>
+   <command>DROP SUBSCRIPTION</command> is a <productname>PostgreSQL</>
+   extension.
+  </para>
+ </refsect1>
+
+ <refsect1>
+  <title>See Also</title>
+
+  <simplelist type="inline">
+   <member><xref linkend="sql-createsubscription"></member>
+   <member><xref linkend="sql-altersubscription"></member>
+  </simplelist>
+ </refsect1>
+</refentry>
index b70e7d57e95725622ed7124eaddb89b705bf04c3..a1e03c481d1107891c4d80654fa697fb80920976 100644 (file)
@@ -755,6 +755,15 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--include-subscriptions</option></term>
+      <listitem>
+       <para>
+        Include logical replication subscriptions in the dump.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>--inserts</option></term>
       <listitem>
@@ -789,6 +798,18 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--no-create-subscription-slots</option></term>
+      <listitem>
+       <para>
+        When dumping logical replication subscriptions,
+        generate <command>CREATE SUBSCRIPTION</command> commands that do not
+        create the remote replication slot.  That way, the dump can be
+        restored without requiring network access to the remote servers.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>--no-security-labels</option></term>
       <listitem>
index 991573121bec6c68fe25be5e185eb2509239155d..640fe12bbf638dc76f996f5ab26ea3eeae9ad9ba 100644 (file)
@@ -1600,6 +1600,34 @@ testdb=&gt;
         </listitem>
       </varlistentry>
 
+      <varlistentry>
+        <term><literal>\dRp[+] [ <link linkend="APP-PSQL-patterns"><replaceable class="parameter">pattern</replaceable></link> ]</literal></term>
+        <listitem>
+        <para>
+        Lists replication publications.
+        If <replaceable class="parameter">pattern</replaceable> is
+        specified, only those publications whose names match the pattern are
+        listed.
+        If <literal>+</literal> is appended to the command name, the tables
+        associated with each publication are shown as well.
+        </para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><literal>\dRs[+] [ <link linkend="APP-PSQL-patterns"><replaceable class="parameter">pattern</replaceable></link> ]</literal></term>
+        <listitem>
+        <para>
+        Lists replication subscriptions.
+        If <replaceable class="parameter">pattern</replaceable> is
+        specified, only those subscriptions whose names match the pattern are
+        listed.
+        If <literal>+</literal> is appended to the command name, additional
+        properties of the subscriptions are shown.
+        </para>
+        </listitem>
+      </varlistentry>
+
       <varlistentry>
         <term><literal>\dT[S+] [ <link linkend="APP-PSQL-patterns"><replaceable class="parameter">pattern</replaceable></link> ]</literal></term>
         <listitem>
index 8acdff1393fd2d1460f5e707c8ada4840bdf8620..34007d3508def2de4405460abcaa3211df16f1b1 100644 (file)
    &alterOperatorClass;
    &alterOperatorFamily;
    &alterPolicy;
+   &alterPublication;
    &alterRole;
    &alterRule;
    &alterSchema;
    &alterSequence;
    &alterServer;
+   &alterSubscription;
    &alterSystem;
    &alterTable;
    &alterTableSpace;
    &createOperatorClass;
    &createOperatorFamily;
    &createPolicy;
+   &createPublication;
    &createRole;
    &createRule;
    &createSchema;
    &createSequence;
    &createServer;
+   &createSubscription;
    &createTable;
    &createTableAs;
    &createTableSpace;
    &dropOperatorFamily;
    &dropOwned;
    &dropPolicy;
+   &dropPublication;
    &dropRole;
    &dropRule;
    &dropSchema;
    &dropSequence;
    &dropServer;
+   &dropSubscription;
    &dropTable;
    &dropTableSpace;
    &dropTSConfig;
index 977f80b469bd6553f9db0f3b3a4e254e687bcbbf..b490c071382958e8f390872b99b302fe91c08e0b 100644 (file)
@@ -22,6 +22,7 @@ SUBDIRS = \
        include \
        interfaces \
        backend/replication/libpqwalreceiver \
+       backend/replication/pgoutput \
        fe_utils \
        bin \
        pl \
index f5346f024efbe80e56c76f0d2b114c05439e31da..f6f136da3ab3fff5a296f2f41d8c23487fb68ab9 100644 (file)
@@ -42,6 +42,7 @@
 #include "miscadmin.h"
 #include "pgstat.h"
 #include "replication/logical.h"
+#include "replication/logicallauncher.h"
 #include "replication/origin.h"
 #include "replication/syncrep.h"
 #include "replication/walsender.h"
@@ -2135,6 +2136,7 @@ CommitTransaction(void)
        AtEOXact_HashTables(true);
        AtEOXact_PgStat(true);
        AtEOXact_Snapshot(true);
+       AtCommit_ApplyLauncher();
        pgstat_report_xact_timestamp(0);
 
        CurrentResourceOwner = NULL;
index cd38c8ab3ff444a7406583267f8643e7760d81a4..31368585d212c8472891b41edc8e45f632326b4f 100644 (file)
@@ -14,8 +14,9 @@ OBJS = catalog.o dependency.o heap.o index.o indexing.o namespace.o aclchk.o \
        objectaccess.o objectaddress.o partition.o pg_aggregate.o pg_collation.o \
        pg_constraint.o pg_conversion.o \
        pg_depend.o pg_enum.o pg_inherits.o pg_largeobject.o pg_namespace.o \
-       pg_operator.o pg_proc.o pg_range.o pg_db_role_setting.o pg_shdepend.o \
-       pg_type.o storage.o toasting.o
+       pg_operator.o pg_proc.o pg_publication.o pg_range.o \
+          pg_db_role_setting.o pg_shdepend.o pg_subscription.o pg_type.o \
+          storage.o toasting.o
 
 BKIFILES = postgres.bki postgres.description postgres.shdescription
 
@@ -42,7 +43,7 @@ POSTGRES_BKI_SRCS = $(addprefix $(top_srcdir)/src/include/catalog/,\
        pg_foreign_table.h pg_policy.h pg_replication_origin.h \
        pg_default_acl.h pg_init_privs.h pg_seclabel.h pg_shseclabel.h \
        pg_collation.h pg_partitioned_table.h pg_range.h pg_transform.h \
-       pg_sequence.h \
+       pg_sequence.h pg_publication.h pg_publication_rel.h pg_subscription.h \
        toasting.h indexing.h \
     )
 
index 640632784c1f5044f6dd384c8631f6fb23423f6c..a96bf692dfdde8e6d5c7d77c9ac61d469fe22345 100644 (file)
@@ -45,6 +45,7 @@
 #include "catalog/pg_operator.h"
 #include "catalog/pg_opfamily.h"
 #include "catalog/pg_proc.h"
+#include "catalog/pg_subscription.h"
 #include "catalog/pg_tablespace.h"
 #include "catalog/pg_type.h"
 #include "catalog/pg_ts_config.h"
@@ -3390,6 +3391,10 @@ static const char *const not_owner_msg[MAX_ACL_KIND] =
        gettext_noop("must be owner of event trigger %s"),
        /* ACL_KIND_EXTENSION */
        gettext_noop("must be owner of extension %s"),
+       /* ACL_KIND_PUBLICATION */
+       gettext_noop("must be owner of publication %s"),
+       /* ACL_KIND_SUBSCRIPTION */
+       gettext_noop("must be owner of subscription %s"),
 };
 
 
@@ -5071,6 +5076,58 @@ pg_extension_ownercheck(Oid ext_oid, Oid roleid)
        return has_privs_of_role(roleid, ownerId);
 }
 
+/*
+ * Ownership check for an publication (specified by OID).
+ */
+bool
+pg_publication_ownercheck(Oid pub_oid, Oid roleid)
+{
+       HeapTuple       tuple;
+       Oid                     ownerId;
+
+       /* Superusers bypass all permission checking. */
+       if (superuser_arg(roleid))
+               return true;
+
+       tuple = SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(pub_oid));
+       if (!HeapTupleIsValid(tuple))
+               ereport(ERROR,
+                               (errcode(ERRCODE_UNDEFINED_OBJECT),
+                                errmsg("publication with OID %u does not exist", pub_oid)));
+
+       ownerId = ((Form_pg_publication) GETSTRUCT(tuple))->pubowner;
+
+       ReleaseSysCache(tuple);
+
+       return has_privs_of_role(roleid, ownerId);
+}
+
+/*
+ * Ownership check for an subscription (specified by OID).
+ */
+bool
+pg_subscription_ownercheck(Oid sub_oid, Oid roleid)
+{
+       HeapTuple       tuple;
+       Oid                     ownerId;
+
+       /* Superusers bypass all permission checking. */
+       if (superuser_arg(roleid))
+               return true;
+
+       tuple = SearchSysCache1(SUBSCRIPTIONOID, ObjectIdGetDatum(sub_oid));
+       if (!HeapTupleIsValid(tuple))
+               ereport(ERROR,
+                               (errcode(ERRCODE_UNDEFINED_OBJECT),
+                                errmsg("subscription with OID %u does not exist", sub_oid)));
+
+       ownerId = ((Form_pg_subscription) GETSTRUCT(tuple))->subowner;
+
+       ReleaseSysCache(tuple);
+
+       return has_privs_of_role(roleid, ownerId);
+}
+
 /*
  * Check whether specified role has CREATEROLE privilege (or is a superuser)
  *
index c3b4d298ce29c25ea3dd100b1d3466db2ca28537..11ee536726ce08bf22b87bc8a087e2f73869e821 100644 (file)
@@ -36,6 +36,7 @@
 #include "catalog/pg_shdepend.h"
 #include "catalog/pg_shdescription.h"
 #include "catalog/pg_shseclabel.h"
+#include "catalog/pg_subscription.h"
 #include "catalog/pg_tablespace.h"
 #include "catalog/toasting.h"
 #include "miscadmin.h"
@@ -227,7 +228,8 @@ IsSharedRelation(Oid relationId)
                relationId == SharedSecLabelRelationId ||
                relationId == TableSpaceRelationId ||
                relationId == DbRoleSettingRelationId ||
-               relationId == ReplicationOriginRelationId)
+               relationId == ReplicationOriginRelationId ||
+               relationId == SubscriptionRelationId)
                return true;
        /* These are their indexes (see indexing.h) */
        if (relationId == AuthIdRolnameIndexId ||
@@ -245,7 +247,9 @@ IsSharedRelation(Oid relationId)
                relationId == TablespaceNameIndexId ||
                relationId == DbRoleSettingDatidRolidIndexId ||
                relationId == ReplicationOriginIdentIndex ||
-               relationId == ReplicationOriginNameIndex)
+               relationId == ReplicationOriginNameIndex ||
+               relationId == SubscriptionObjectIndexId ||
+               relationId == SubscriptionNameIndexId)
                return true;
        /* These are their toast tables and toast indexes (see toasting.h) */
        if (relationId == PgShdescriptionToastTable ||
index 359719e45047ac5ebcffa19aac47ec8fa4d0c7c0..1c43af6effb91aba6a64e3c5ad2f8af91b3bdf82 100644 (file)
 #include "catalog/pg_opfamily.h"
 #include "catalog/pg_policy.h"
 #include "catalog/pg_proc.h"
+#include "catalog/pg_publication.h"
+#include "catalog/pg_publication_rel.h"
 #include "catalog/pg_rewrite.h"
+#include "catalog/pg_subscription.h"
 #include "catalog/pg_tablespace.h"
 #include "catalog/pg_transform.h"
 #include "catalog/pg_trigger.h"
@@ -64,6 +67,7 @@
 #include "commands/extension.h"
 #include "commands/policy.h"
 #include "commands/proclang.h"
+#include "commands/publicationcmds.h"
 #include "commands/schemacmds.h"
 #include "commands/seclabel.h"
 #include "commands/sequence.h"
@@ -164,6 +168,9 @@ static const Oid object_classes[] = {
        ExtensionRelationId,            /* OCLASS_EXTENSION */
        EventTriggerRelationId,         /* OCLASS_EVENT_TRIGGER */
        PolicyRelationId,                       /* OCLASS_POLICY */
+       PublicationRelationId,          /* OCLASS_PUBLICATION */
+       PublicationRelRelationId,       /* OCLASS_PUBLICATION_REL */
+       SubscriptionRelationId,         /* OCLASS_SUBSCRIPTION */
        TransformRelationId                     /* OCLASS_TRANSFORM */
 };
 
@@ -1244,6 +1251,14 @@ doDeletion(const ObjectAddress *object, int flags)
                        RemovePolicyById(object->objectId);
                        break;
 
+               case OCLASS_PUBLICATION:
+                       RemovePublicationById(object->objectId);
+                       break;
+
+               case OCLASS_PUBLICATION_REL:
+                       RemovePublicationRelById(object->objectId);
+                       break;
+
                case OCLASS_TRANSFORM:
                        DropTransformById(object->objectId);
                        break;
@@ -2404,6 +2419,15 @@ getObjectClass(const ObjectAddress *object)
                case PolicyRelationId:
                        return OCLASS_POLICY;
 
+               case PublicationRelationId:
+                       return OCLASS_PUBLICATION;
+
+               case PublicationRelRelationId:
+                       return OCLASS_PUBLICATION_REL;
+
+               case SubscriptionRelationId:
+                       return OCLASS_SUBSCRIPTION;
+
                case TransformRelationId:
                        return OCLASS_TRANSFORM;
        }
index 2b1808b0f92651ca5f3165e830cd35e542d0b7f5..44d14ae2b18a97e79b4ef38bdb53cfb48c0ff297 100644 (file)
 #include "catalog/pg_operator.h"
 #include "catalog/pg_proc.h"
 #include "catalog/pg_policy.h"
+#include "catalog/pg_publication.h"
+#include "catalog/pg_publication_rel.h"
 #include "catalog/pg_rewrite.h"
+#include "catalog/pg_subscription.h"
 #include "catalog/pg_tablespace.h"
 #include "catalog/pg_transform.h"
 #include "catalog/pg_trigger.h"
@@ -450,6 +453,30 @@ static const ObjectPropertyType ObjectProperty[] =
                Anum_pg_type_typacl,
                ACL_KIND_TYPE,
                true
+       },
+       {
+               PublicationRelationId,
+               PublicationObjectIndexId,
+               PUBLICATIONOID,
+               PUBLICATIONNAME,
+               Anum_pg_publication_pubname,
+               InvalidAttrNumber,
+               Anum_pg_publication_pubowner,
+               InvalidAttrNumber,
+               -1,
+               true
+       },
+       {
+               SubscriptionRelationId,
+               SubscriptionObjectIndexId,
+               SUBSCRIPTIONOID,
+               SUBSCRIPTIONNAME,
+               Anum_pg_subscription_subname,
+               InvalidAttrNumber,
+               Anum_pg_subscription_subowner,
+               InvalidAttrNumber,
+               -1,
+               true
        }
 };
 
@@ -653,6 +680,18 @@ static const struct object_type_map
        {
                "policy", OBJECT_POLICY
        },
+       /* OCLASS_PUBLICATION */
+       {
+               "publication", OBJECT_PUBLICATION
+       },
+       /* OCLASS_PUBLICATION_REL */
+       {
+               "publication relation", OBJECT_PUBLICATION_REL
+       },
+       /* OCLASS_SUBSCRIPTION */
+       {
+               "subscription", OBJECT_SUBSCRIPTION
+       },
        /* OCLASS_TRANSFORM */
        {
                "transform", OBJECT_TRANSFORM
@@ -688,6 +727,9 @@ static ObjectAddress get_object_address_opf_member(ObjectType objtype,
 
 static ObjectAddress get_object_address_usermapping(List *objname,
                                                           List *objargs, bool missing_ok);
+static ObjectAddress get_object_address_publication_rel(List *objname,
+                                                                  List *objargs, Relation *relation,
+                                                                  bool missing_ok);
 static ObjectAddress get_object_address_defacl(List *objname, List *objargs,
                                                  bool missing_ok);
 static const ObjectPropertyType *get_object_property_data(Oid class_id);
@@ -812,6 +854,8 @@ get_object_address(ObjectType objtype, List *objname, List *objargs,
                        case OBJECT_FOREIGN_SERVER:
                        case OBJECT_EVENT_TRIGGER:
                        case OBJECT_ACCESS_METHOD:
+                       case OBJECT_PUBLICATION:
+                       case OBJECT_SUBSCRIPTION:
                                address = get_object_address_unqualified(objtype,
                                                                                                                 objname, missing_ok);
                                break;
@@ -926,6 +970,10 @@ get_object_address(ObjectType objtype, List *objname, List *objargs,
                                address = get_object_address_usermapping(objname, objargs,
                                                                                                                 missing_ok);
                                break;
+                       case OBJECT_PUBLICATION_REL:
+                               address = get_object_address_publication_rel(objname, objargs,
+                                                                                                                        &relation,
+                                                                                                                        missing_ok);
                        case OBJECT_DEFACL:
                                address = get_object_address_defacl(objname, objargs,
                                                                                                        missing_ok);
@@ -1091,6 +1139,12 @@ get_object_address_unqualified(ObjectType objtype,
                        case OBJECT_EVENT_TRIGGER:
                                msg = gettext_noop("event trigger name cannot be qualified");
                                break;
+                       case OBJECT_PUBLICATION:
+                               msg = gettext_noop("publication name cannot be qualified");
+                               break;
+                       case OBJECT_SUBSCRIPTION:
+                               msg = gettext_noop("subscription name cannot be qualified");
+                               break;
                        default:
                                elog(ERROR, "unrecognized objtype: %d", (int) objtype);
                                msg = NULL;             /* placate compiler */
@@ -1156,6 +1210,16 @@ get_object_address_unqualified(ObjectType objtype,
                        address.objectId = get_event_trigger_oid(name, missing_ok);
                        address.objectSubId = 0;
                        break;
+               case OBJECT_PUBLICATION:
+                       address.classId = PublicationRelationId;
+                       address.objectId = get_publication_oid(name, missing_ok);
+                       address.objectSubId = 0;
+                       break;
+               case OBJECT_SUBSCRIPTION:
+                       address.classId = SubscriptionRelationId;
+                       address.objectId = get_subscription_oid(name, missing_ok);
+                       address.objectSubId = 0;
+                       break;
                default:
                        elog(ERROR, "unrecognized objtype: %d", (int) objtype);
                        /* placate compiler, which doesn't know elog won't return */
@@ -1743,6 +1807,51 @@ get_object_address_usermapping(List *objname, List *objargs, bool missing_ok)
        return address;
 }
 
+/*
+ * Find the ObjectAddress for a publication relation.  The objname parameter
+ * is the relation name; objargs contains the publication name.
+ */
+static ObjectAddress
+get_object_address_publication_rel(List *objname, List *objargs,
+                                                                  Relation *relation, bool missing_ok)
+{
+       ObjectAddress address;
+       char       *pubname;
+       Publication *pub;
+
+       ObjectAddressSet(address, PublicationRelRelationId, InvalidOid);
+
+       *relation = relation_openrv_extended(makeRangeVarFromNameList(objname),
+                                                                                AccessShareLock, missing_ok);
+       if (!relation)
+               return address;
+
+       /* fetch publication name from input list */
+       pubname = strVal(linitial(objargs));
+
+       /* Now look up the pg_publication tuple */
+       pub = GetPublicationByName(pubname, missing_ok);
+       if (!pub)
+               return address;
+
+       /* Find the publication relation mapping in syscache. */
+       address.objectId =
+               GetSysCacheOid2(PUBLICATIONRELMAP,
+                                               ObjectIdGetDatum(RelationGetRelid(*relation)),
+                                               ObjectIdGetDatum(pub->oid));
+       if (!OidIsValid(address.objectId))
+       {
+               if (!missing_ok)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_UNDEFINED_OBJECT),
+                                        errmsg("publication relation \"%s\" in publication \"%s\" does not exist",
+                                                       RelationGetRelationName(*relation), pubname)));
+               return address;
+       }
+
+       return address;
+}
+
 /*
  * Find the ObjectAddress for a default ACL.
  */
@@ -2002,6 +2111,7 @@ pg_get_object_address(PG_FUNCTION_ARGS)
                case OBJECT_DOMCONSTRAINT:
                case OBJECT_CAST:
                case OBJECT_USER_MAPPING:
+               case OBJECT_PUBLICATION_REL:
                case OBJECT_DEFACL:
                case OBJECT_TRANSFORM:
                        if (list_length(args) != 1)
@@ -2183,6 +2293,16 @@ check_object_ownership(Oid roleid, ObjectType objtype, ObjectAddress address,
                                                                        format_type_be(targettypeid))));
                        }
                        break;
+               case OBJECT_PUBLICATION:
+                       if (!pg_publication_ownercheck(address.objectId, roleid))
+                               aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_PUBLICATION,
+                                                          NameListToString(objname));
+                       break;
+               case OBJECT_SUBSCRIPTION:
+                       if (!pg_subscription_ownercheck(address.objectId, roleid))
+                               aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_SUBSCRIPTION,
+                                                          NameListToString(objname));
+                       break;
                case OBJECT_TRANSFORM:
                        {
                                TypeName   *typename = (TypeName *) linitial(objname);
@@ -3191,6 +3311,41 @@ getObjectDescription(const ObjectAddress *object)
                                break;
                        }
 
+               case OCLASS_PUBLICATION:
+                       {
+                               appendStringInfo(&buffer, _("publication %s"),
+                                                                get_publication_name(object->objectId));
+                               break;
+                       }
+
+               case OCLASS_PUBLICATION_REL:
+                       {
+                               HeapTuple       tup;
+                               char       *pubname;
+                               Form_pg_publication_rel prform;
+
+                               tup = SearchSysCache1(PUBLICATIONREL,
+                                                                         ObjectIdGetDatum(object->objectId));
+                               if (!HeapTupleIsValid(tup))
+                                       elog(ERROR, "cache lookup failed for publication table %u",
+                                                object->objectId);
+
+                               prform = (Form_pg_publication_rel) GETSTRUCT(tup);
+                               pubname = get_publication_name(prform->prpubid);
+
+                               appendStringInfo(&buffer, _("publication table %s in publication %s"),
+                                                                get_rel_name(prform->prrelid), pubname);
+                               ReleaseSysCache(tup);
+                               break;
+                       }
+
+               case OCLASS_SUBSCRIPTION:
+                       {
+                               appendStringInfo(&buffer, _("subscription %s"),
+                                                                get_subscription_name(object->objectId));
+                               break;
+                       }
+
                default:
                        appendStringInfo(&buffer, "unrecognized object %u %u %d",
                                                         object->classId,
@@ -3677,6 +3832,18 @@ getObjectTypeDescription(const ObjectAddress *object)
                        appendStringInfoString(&buffer, "access method");
                        break;
 
+               case OCLASS_PUBLICATION:
+                       appendStringInfoString(&buffer, "publication");
+                       break;
+
+               case OCLASS_PUBLICATION_REL:
+                       appendStringInfoString(&buffer, "publication table");
+                       break;
+
+               case OCLASS_SUBSCRIPTION:
+                       appendStringInfoString(&buffer, "subscription");
+                       break;
+
                default:
                        appendStringInfo(&buffer, "unrecognized %u", object->classId);
                        break;
@@ -4648,6 +4815,58 @@ getObjectIdentityParts(const ObjectAddress *object,
                        }
                        break;
 
+               case OCLASS_PUBLICATION:
+                       {
+                               char       *pubname;
+
+                               pubname = get_publication_name(object->objectId);
+                               appendStringInfoString(&buffer,
+                                                                          quote_identifier(pubname));
+                               if (objname)
+                                       *objname = list_make1(pubname);
+                               break;
+                       }
+
+               case OCLASS_PUBLICATION_REL:
+                       {
+                               HeapTuple       tup;
+                               char       *pubname;
+                               Form_pg_publication_rel prform;
+
+                               tup = SearchSysCache1(PUBLICATIONREL,
+                                                                         ObjectIdGetDatum(object->objectId));
+                               if (!HeapTupleIsValid(tup))
+                                       elog(ERROR, "cache lookup failed for publication table %u",
+                                                object->objectId);
+
+                               prform = (Form_pg_publication_rel) GETSTRUCT(tup);
+                               pubname = get_publication_name(prform->prpubid);
+
+                               appendStringInfo(&buffer, _("publication table %s in publication %s"),
+                                                                get_rel_name(prform->prrelid), pubname);
+
+                               if (objname)
+                               {
+                                       getRelationIdentity(&buffer, prform->prrelid, objname);
+                                       *objargs = list_make1(pubname);
+                               }
+
+                               ReleaseSysCache(tup);
+                               break;
+                       }
+
+               case OCLASS_SUBSCRIPTION:
+                       {
+                               char       *subname;
+
+                               subname = get_subscription_name(object->objectId);
+                               appendStringInfoString(&buffer,
+                                                                          quote_identifier(subname));
+                               if (objname)
+                                       *objname = list_make1(subname);
+                               break;
+                       }
+
                default:
                        appendStringInfo(&buffer, "unrecognized object %u %u %d",
                                                         object->classId,
diff --git a/src/backend/catalog/pg_publication.c b/src/backend/catalog/pg_publication.c
new file mode 100644 (file)
index 0000000..576b7fa
--- /dev/null
@@ -0,0 +1,457 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_publication.c
+ *             publication C API manipulation
+ *
+ * Copyright (c) 2016, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *             pg_publication.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "funcapi.h"
+#include "miscadmin.h"
+
+#include "access/genam.h"
+#include "access/hash.h"
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "access/xact.h"
+
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/index.h"
+#include "catalog/indexing.h"
+#include "catalog/namespace.h"
+#include "catalog/objectaccess.h"
+#include "catalog/objectaddress.h"
+#include "catalog/pg_type.h"
+#include "catalog/pg_publication.h"
+#include "catalog/pg_publication_rel.h"
+
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/catcache.h"
+#include "utils/fmgroids.h"
+#include "utils/inval.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+/*
+ * Check if relation can be in given publication and throws appropriate
+ * error if not.
+ */
+static void
+check_publication_add_relation(Relation targetrel)
+{
+       /* Must be table */
+       if (RelationGetForm(targetrel)->relkind != RELKIND_RELATION)
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                errmsg("\"%s\" is not a table",
+                                               RelationGetRelationName(targetrel)),
+                                errdetail("Only tables can be added to publications.")));
+
+       /* Can't be system table */
+       if (IsCatalogRelation(targetrel))
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                errmsg("\"%s\" is a system table",
+                                               RelationGetRelationName(targetrel)),
+                                errdetail("System tables cannot be added to publications.")));
+
+       /* UNLOGGED and TEMP relations cannot be part of publication. */
+       if (!RelationNeedsWAL(targetrel))
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                errmsg("table \"%s\" cannot be replicated",
+                                               RelationGetRelationName(targetrel)),
+                                errdetail("Temporary and unlogged relations cannot be replicated.")));
+}
+
+/*
+ * Returns if relation represented by oid and Form_pg_class entry
+ * is publishable.
+ *
+ * Does same checks as the above, but does not need relation to be opened
+ * and also does not throw errors.
+ */
+static bool
+is_publishable_class(Oid relid, Form_pg_class reltuple)
+{
+       return reltuple->relkind == RELKIND_RELATION &&
+               !IsCatalogClass(relid, reltuple) &&
+               reltuple->relpersistence == RELPERSISTENCE_PERMANENT &&
+               /*
+                * Also exclude any tables created as part of initdb. This mainly
+                * affects the preinstalled information_schema.
+                * Note that IsCatalogClass() only checks for these inside pg_catalog
+                * and toast schemas.
+                */
+               relid >= FirstNormalObjectId;
+}
+
+/*
+ * Insert new publication / relation mapping.
+ */
+ObjectAddress
+publication_add_relation(Oid pubid, Relation targetrel,
+                                                bool if_not_exists)
+{
+       Relation        rel;
+       HeapTuple       tup;
+       Datum           values[Natts_pg_publication_rel];
+       bool            nulls[Natts_pg_publication_rel];
+       Oid                     relid = RelationGetRelid(targetrel);
+       Oid                     prrelid;
+       Publication *pub = GetPublication(pubid);
+       ObjectAddress   myself,
+                                       referenced;
+
+       rel = heap_open(PublicationRelRelationId, RowExclusiveLock);
+
+       /*
+        * Check for duplicates. Note that this does not really prevent
+        * duplicates, it's here just to provide nicer error message in common
+        * case. The real protection is the unique key on the catalog.
+        */
+       if (SearchSysCacheExists2(PUBLICATIONRELMAP, ObjectIdGetDatum(relid),
+                                                         ObjectIdGetDatum(pubid)))
+       {
+               heap_close(rel, RowExclusiveLock);
+
+               if (if_not_exists)
+                       return InvalidObjectAddress;
+
+               ereport(ERROR,
+                               (errcode(ERRCODE_DUPLICATE_OBJECT),
+                                errmsg("relation \"%s\" is already member of publication \"%s\"",
+                                               RelationGetRelationName(targetrel), pub->name)));
+       }
+
+       check_publication_add_relation(targetrel);
+
+       /* Form a tuple. */
+       memset(values, 0, sizeof(values));
+       memset(nulls, false, sizeof(nulls));
+
+       values[Anum_pg_publication_rel_prpubid - 1] =
+               ObjectIdGetDatum(pubid);
+       values[Anum_pg_publication_rel_prrelid - 1] =
+               ObjectIdGetDatum(relid);
+
+       tup = heap_form_tuple(RelationGetDescr(rel), values, nulls);
+
+       /* Insert tuple into catalog. */
+       prrelid = simple_heap_insert(rel, tup);
+       CatalogUpdateIndexes(rel, tup);
+       heap_freetuple(tup);
+
+       ObjectAddressSet(myself, PublicationRelRelationId, prrelid);
+
+       /* Add dependency on the publication */
+       ObjectAddressSet(referenced, PublicationRelationId, pubid);
+       recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
+
+       /* Add dependency on the relation */
+       ObjectAddressSet(referenced, RelationRelationId, relid);
+       recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
+
+       /* Close the table. */
+       heap_close(rel, RowExclusiveLock);
+
+       /* Invalidate relcache so that publication info is rebuilt. */
+       CacheInvalidateRelcache(targetrel);
+
+       return myself;
+}
+
+
+/*
+ * Gets list of publication oids for a relation oid.
+ */
+List *
+GetRelationPublications(Oid relid)
+{
+       List               *result = NIL;
+       CatCList           *pubrellist;
+       int                             i;
+
+       /* Find all publications associated with the relation. */
+       pubrellist = SearchSysCacheList1(PUBLICATIONRELMAP,
+                                                                        ObjectIdGetDatum(relid));
+       for (i = 0; i < pubrellist->n_members; i++)
+       {
+               HeapTuple       tup = &pubrellist->members[i]->tuple;
+               Oid                     pubid = ((Form_pg_publication_rel) GETSTRUCT(tup))->prpubid;
+
+               result = lappend_oid(result, pubid);
+       }
+
+       ReleaseSysCacheList(pubrellist);
+
+       return result;
+}
+
+/*
+ * Gets list of relation oids for a publication.
+ *
+ * This should only be used for normal publications, the FOR ALL TABLES
+ * should use GetAllTablesPublicationRelations().
+ */
+List *
+GetPublicationRelations(Oid pubid)
+{
+       List               *result;
+       Relation                pubrelsrel;
+       ScanKeyData             scankey;
+       SysScanDesc             scan;
+       HeapTuple               tup;
+
+       /* Find all publications associated with the relation. */
+       pubrelsrel = heap_open(PublicationRelRelationId, AccessShareLock);
+
+       ScanKeyInit(&scankey,
+                               Anum_pg_publication_rel_prpubid,
+                               BTEqualStrategyNumber, F_OIDEQ,
+                               ObjectIdGetDatum(pubid));
+
+       scan = systable_beginscan(pubrelsrel, PublicationRelMapIndexId, true,
+                                                         NULL, 1, &scankey);
+
+       result = NIL;
+       while (HeapTupleIsValid(tup = systable_getnext(scan)))
+       {
+               Form_pg_publication_rel         pubrel;
+
+               pubrel = (Form_pg_publication_rel) GETSTRUCT(tup);
+
+               result = lappend_oid(result, pubrel->prrelid);
+       }
+
+       systable_endscan(scan);
+       heap_close(pubrelsrel, AccessShareLock);
+
+       return result;
+}
+
+/*
+ * Gets list of publication oids for publications marked as FOR ALL TABLES.
+ */
+List *
+GetAllTablesPublications(void)
+{
+       List               *result;
+       Relation                rel;
+       ScanKeyData             scankey;
+       SysScanDesc             scan;
+       HeapTuple               tup;
+
+       /* Find all publications that are marked as for all tables. */
+       rel = heap_open(PublicationRelationId, AccessShareLock);
+
+       ScanKeyInit(&scankey,
+                               Anum_pg_publication_puballtables,
+                               BTEqualStrategyNumber, F_BOOLEQ,
+                               BoolGetDatum(true));
+
+       scan = systable_beginscan(rel, InvalidOid, false,
+                                                         NULL, 1, &scankey);
+
+       result = NIL;
+       while (HeapTupleIsValid(tup = systable_getnext(scan)))
+               result = lappend_oid(result, HeapTupleGetOid(tup));
+
+       systable_endscan(scan);
+       heap_close(rel, AccessShareLock);
+
+       return result;
+}
+
+/*
+ * Gets list of all relation published by FOR ALL TABLES publication(s).
+ */
+List *
+GetAllTablesPublicationRelations(void)
+{
+       Relation        classRel;
+       ScanKeyData key[1];
+       HeapScanDesc scan;
+       HeapTuple       tuple;
+       List       *result = NIL;
+
+       classRel = heap_open(RelationRelationId, AccessShareLock);
+
+       ScanKeyInit(&key[0],
+                               Anum_pg_class_relkind,
+                               BTEqualStrategyNumber, F_CHAREQ,
+                               CharGetDatum(RELKIND_RELATION));
+
+       scan = heap_beginscan_catalog(classRel, 1, key);
+
+       while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+       {
+               Oid                             relid = HeapTupleGetOid(tuple);
+               Form_pg_class   relForm = (Form_pg_class) GETSTRUCT(tuple);
+
+               if (is_publishable_class(relid, relForm))
+                       result = lappend_oid(result, relid);
+       }
+
+       heap_endscan(scan);
+       heap_close(classRel, AccessShareLock);
+
+       return result;
+}
+
+/*
+ * Get publication using oid
+ *
+ * The Publication struct and it's data are palloced here.
+ */
+Publication *
+GetPublication(Oid pubid)
+{
+       HeapTuple               tup;
+       Publication        *pub;
+       Form_pg_publication     pubform;
+
+       tup = SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(pubid));
+
+       if (!HeapTupleIsValid(tup))
+               elog(ERROR, "cache lookup failed for publication %u", pubid);
+
+       pubform = (Form_pg_publication) GETSTRUCT(tup);
+
+       pub = (Publication *) palloc(sizeof(Publication));
+       pub->oid = pubid;
+       pub->name = pstrdup(NameStr(pubform->pubname));
+       pub->alltables = pubform->puballtables;
+       pub->pubactions.pubinsert = pubform->pubinsert;
+       pub->pubactions.pubupdate = pubform->pubupdate;
+       pub->pubactions.pubdelete = pubform->pubdelete;
+
+       ReleaseSysCache(tup);
+
+       return pub;
+}
+
+
+/*
+ * Get Publication using name.
+ */
+Publication *
+GetPublicationByName(const char *pubname, bool missing_ok)
+{
+       Oid                     oid;
+
+       oid = GetSysCacheOid1(PUBLICATIONNAME, CStringGetDatum(pubname));
+       if (!OidIsValid(oid))
+       {
+               if (missing_ok)
+                       return NULL;
+
+               ereport(ERROR,
+                               (errcode(ERRCODE_UNDEFINED_OBJECT),
+                                errmsg("publication \"%s\" does not exist", pubname)));
+       }
+
+       return GetPublication(oid);
+}
+
+/*
+ * get_publication_oid - given a publication name, look up the OID
+ *
+ * If missing_ok is false, throw an error if name not found.  If true, just
+ * return InvalidOid.
+ */
+Oid
+get_publication_oid(const char *pubname, bool missing_ok)
+{
+       Oid                     oid;
+
+       oid = GetSysCacheOid1(PUBLICATIONNAME, CStringGetDatum(pubname));
+       if (!OidIsValid(oid) && !missing_ok)
+               ereport(ERROR,
+                               (errcode(ERRCODE_UNDEFINED_OBJECT),
+                                errmsg("publication \"%s\" does not exist", pubname)));
+       return oid;
+}
+
+/*
+ * get_publication_name - given a publication Oid, look up the name
+ */
+char *
+get_publication_name(Oid pubid)
+{
+       HeapTuple               tup;
+       char               *pubname;
+       Form_pg_publication     pubform;
+
+       tup = SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(pubid));
+
+       if (!HeapTupleIsValid(tup))
+               elog(ERROR, "cache lookup failed for publication %u", pubid);
+
+       pubform = (Form_pg_publication) GETSTRUCT(tup);
+       pubname = pstrdup(NameStr(pubform->pubname));
+
+       ReleaseSysCache(tup);
+
+       return pubname;
+}
+
+/*
+ * Returns Oids of tables in a publication.
+ */
+Datum
+pg_get_publication_tables(PG_FUNCTION_ARGS)
+{
+       FuncCallContext *funcctx;
+       char               *pubname = text_to_cstring(PG_GETARG_TEXT_PP(0));
+       Publication        *publication;
+       List               *tables;
+       ListCell          **lcp;
+
+       /* stuff done only on the first call of the function */
+       if (SRF_IS_FIRSTCALL())
+       {
+               MemoryContext oldcontext;
+
+               /* create a function context for cross-call persistence */
+               funcctx = SRF_FIRSTCALL_INIT();
+
+               /* switch to memory context appropriate for multiple function calls */
+               oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+               publication = GetPublicationByName(pubname, false);
+               if (publication->alltables)
+                       tables = GetAllTablesPublicationRelations();
+               else
+                       tables = GetPublicationRelations(publication->oid);
+               lcp = (ListCell **) palloc(sizeof(ListCell *));
+               *lcp = list_head(tables);
+               funcctx->user_fctx = (void *) lcp;
+
+               MemoryContextSwitchTo(oldcontext);
+       }
+
+       /* stuff done on every call of the function */
+       funcctx = SRF_PERCALL_SETUP();
+       lcp = (ListCell **) funcctx->user_fctx;
+
+       while (*lcp != NULL)
+       {
+               Oid             relid = lfirst_oid(*lcp);
+
+               *lcp = lnext(*lcp);
+               SRF_RETURN_NEXT(funcctx, ObjectIdGetDatum(relid));
+       }
+
+       SRF_RETURN_DONE(funcctx);
+}
index fb39a01841a06497a900c1846c1d710354211d66..60ed957655e8e86d9c7ba9d69e8d9faf056ddbf1 100644 (file)
@@ -39,6 +39,7 @@
 #include "catalog/pg_opfamily.h"
 #include "catalog/pg_proc.h"
 #include "catalog/pg_shdepend.h"
+#include "catalog/pg_subscription.h"
 #include "catalog/pg_tablespace.h"
 #include "catalog/pg_ts_config.h"
 #include "catalog/pg_ts_dict.h"
@@ -53,7 +54,9 @@
 #include "commands/extension.h"
 #include "commands/policy.h"
 #include "commands/proclang.h"
+#include "commands/publicationcmds.h"
 #include "commands/schemacmds.h"
+#include "commands/subscriptioncmds.h"
 #include "commands/tablecmds.h"
 #include "commands/typecmds.h"
 #include "storage/lmgr.h"
@@ -1406,6 +1409,14 @@ shdepReassignOwned(List *roleids, Oid newrole)
                                        AlterEventTriggerOwner_oid(sdepForm->objid, newrole);
                                        break;
 
+                               case PublicationRelationId:
+                                       AlterPublicationOwner_oid(sdepForm->objid, newrole);
+                                       break;
+
+                               case SubscriptionRelationId:
+                                       AlterSubscriptionOwner_oid(sdepForm->objid, newrole);
+                                       break;
+
                                        /* Generic alter owner cases */
                                case CollationRelationId:
                                case ConversionRelationId:
diff --git a/src/backend/catalog/pg_subscription.c b/src/backend/catalog/pg_subscription.c
new file mode 100644 (file)
index 0000000..c358ef6
--- /dev/null
@@ -0,0 +1,207 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_subscription.c
+ *             replication subscriptions
+ *
+ * Copyright (c) 2016, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *             src/backend/catalog/pg_subscription.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "miscadmin.h"
+
+#include "access/genam.h"
+#include "access/heapam.h"
+#include "access/htup_details.h"
+
+#include "catalog/pg_type.h"
+#include "catalog/pg_subscription.h"
+
+#include "nodes/makefuncs.h"
+
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/syscache.h"
+
+
+static List *textarray_to_stringlist(ArrayType *textarray);
+
+/*
+ * Fetch the subscription from the syscache.
+ */
+Subscription *
+GetSubscription(Oid subid, bool missing_ok)
+{
+       HeapTuple               tup;
+       Subscription   *sub;
+       Form_pg_subscription    subform;
+       Datum                   datum;
+       bool                    isnull;
+
+       tup = SearchSysCache1(SUBSCRIPTIONOID, ObjectIdGetDatum(subid));
+
+       if (!HeapTupleIsValid(tup))
+       {
+               if (missing_ok)
+                       return NULL;
+
+               elog(ERROR, "cache lookup failed for subscription %u", subid);
+       }
+
+       subform = (Form_pg_subscription) GETSTRUCT(tup);
+
+       sub = (Subscription *) palloc(sizeof(Subscription));
+       sub->oid = subid;
+       sub->dbid = subform->subdbid;
+       sub->name = pstrdup(NameStr(subform->subname));
+       sub->owner = subform->subowner;
+       sub->enabled = subform->subenabled;
+
+       /* Get conninfo */
+       datum = SysCacheGetAttr(SUBSCRIPTIONOID,
+                                                       tup,
+                                                       Anum_pg_subscription_subconninfo,
+                                                       &isnull);
+       Assert(!isnull);
+       sub->conninfo = pstrdup(TextDatumGetCString(datum));
+
+       /* Get slotname */
+       datum = SysCacheGetAttr(SUBSCRIPTIONOID,
+                                                       tup,
+                                                       Anum_pg_subscription_subslotname,
+                                                       &isnull);
+       Assert(!isnull);
+       sub->slotname = pstrdup(NameStr(*DatumGetName(datum)));
+
+       /* Get publications */
+       datum = SysCacheGetAttr(SUBSCRIPTIONOID,
+                                                       tup,
+                                                       Anum_pg_subscription_subpublications,
+                                                       &isnull);
+       Assert(!isnull);
+       sub->publications = textarray_to_stringlist(DatumGetArrayTypeP(datum));
+
+       ReleaseSysCache(tup);
+
+       return sub;
+}
+
+/*
+ * Return number of subscriptions defined in given database.
+ * Used by dropdb() to check if database can indeed be dropped.
+ */
+int
+CountDBSubscriptions(Oid dbid)
+{
+       int                             nsubs = 0;
+       Relation                rel;
+       ScanKeyData             scankey;
+       SysScanDesc             scan;
+       HeapTuple               tup;
+
+       rel = heap_open(SubscriptionRelationId, RowExclusiveLock);
+
+       ScanKeyInit(&scankey,
+                               Anum_pg_subscription_subdbid,
+                               BTEqualStrategyNumber, F_OIDEQ,
+                               ObjectIdGetDatum(dbid));
+
+       scan = systable_beginscan(rel, InvalidOid, false,
+                                                         NULL, 1, &scankey);
+
+       while (HeapTupleIsValid(tup = systable_getnext(scan)))
+               nsubs++;
+
+       systable_endscan(scan);
+
+       heap_close(rel, NoLock);
+
+       return nsubs;
+}
+
+/*
+ * Free memory allocated by subscription struct.
+ */
+void
+FreeSubscription(Subscription *sub)
+{
+       pfree(sub->name);
+       pfree(sub->conninfo);
+       pfree(sub->slotname);
+       list_free_deep(sub->publications);
+       pfree(sub);
+}
+
+/*
+ * get_subscription_oid - given a subscription name, look up the OID
+ *
+ * If missing_ok is false, throw an error if name not found.  If true, just
+ * return InvalidOid.
+ */
+Oid
+get_subscription_oid(const char *subname, bool missing_ok)
+{
+       Oid                     oid;
+
+       oid = GetSysCacheOid2(SUBSCRIPTIONNAME, MyDatabaseId,
+                                                 CStringGetDatum(subname));
+       if (!OidIsValid(oid) && !missing_ok)
+               ereport(ERROR,
+                               (errcode(ERRCODE_UNDEFINED_OBJECT),
+                                errmsg("subscription \"%s\" does not exist", subname)));
+       return oid;
+}
+
+/*
+ * get_subscription_name - given a subscription OID, look up the name
+ */
+char *
+get_subscription_name(Oid subid)
+{
+       HeapTuple               tup;
+       char               *subname;
+       Form_pg_subscription subform;
+
+       tup = SearchSysCache1(SUBSCRIPTIONOID, ObjectIdGetDatum(subid));
+
+       if (!HeapTupleIsValid(tup))
+               elog(ERROR, "cache lookup failed for subscription %u", subid);
+
+       subform = (Form_pg_subscription) GETSTRUCT(tup);
+       subname = pstrdup(NameStr(subform->subname));
+
+       ReleaseSysCache(tup);
+
+       return subname;
+}
+
+/*
+ * Convert text array to list of strings.
+ *
+ * Note: the resulting list of strings is pallocated here.
+ */
+static List *
+textarray_to_stringlist(ArrayType *textarray)
+{
+       Datum              *elems;
+       int                             nelems, i;
+       List               *res = NIL;
+
+       deconstruct_array(textarray,
+                                         TEXTOID, -1, false, 'i',
+                                         &elems, NULL, &nelems);
+
+       if (nelems == 0)
+               return NIL;
+
+       for (i = 0; i < nelems; i++)
+               res = lappend(res, makeString(pstrdup(TextDatumGetCString(elems[i]))));
+
+       return res;
+}
index 07f291b7cdb4f43af2966e54663e05f3a3d7f4cb..4dfedf89b611dff4a6a58600100fbdf38f48174e 100644 (file)
@@ -248,6 +248,15 @@ CREATE VIEW pg_stats WITH (security_barrier) AS
 
 REVOKE ALL on pg_statistic FROM public;
 
+CREATE VIEW pg_publication_tables AS
+    SELECT
+        P.pubname AS pubname,
+        N.nspname AS schemaname,
+        C.relname AS tablename
+    FROM pg_publication P, pg_class C
+         JOIN pg_namespace N ON (N.oid = C.relnamespace)
+    WHERE C.oid IN (SELECT relid FROM pg_get_publication_tables(P.pubname));
+
 CREATE VIEW pg_locks AS
     SELECT * FROM pg_lock_status() AS L;
 
@@ -708,6 +717,20 @@ CREATE VIEW pg_stat_wal_receiver AS
     FROM pg_stat_get_wal_receiver() s
     WHERE s.pid IS NOT NULL;
 
+CREATE VIEW pg_stat_subscription AS
+    SELECT
+            su.oid AS subid,
+            su.subname,
+            st.pid,
+            st.received_lsn,
+            st.last_msg_send_time,
+            st.last_msg_receipt_time,
+            st.latest_end_lsn,
+            st.latest_end_time
+    FROM pg_subscription su
+            LEFT JOIN pg_stat_get_subscription(NULL) st
+                      ON (st.subid = su.oid);
+
 CREATE VIEW pg_stat_ssl AS
     SELECT
             S.pid,
@@ -866,6 +889,8 @@ CREATE VIEW pg_replication_origin_status AS
 
 REVOKE ALL ON pg_replication_origin_status FROM public;
 
+REVOKE ALL ON pg_subscription FROM public;
+
 --
 -- We have a few function definitions in here, too.
 -- At some point there might be enough to justify breaking them out into
index 6b3742c0a0847196b7696ee5cdb9698cd611ed86..e0fab38cbe19af855403be312d5495165d9f0533 100644 (file)
@@ -17,9 +17,9 @@ OBJS = amcmds.o aggregatecmds.o alter.o analyze.o async.o cluster.o comment.o \
        dbcommands.o define.o discard.o dropcmds.o \
        event_trigger.o explain.o extension.o foreigncmds.o functioncmds.o \
        indexcmds.o lockcmds.o matview.o operatorcmds.o opclasscmds.o \
-       policy.o portalcmds.o prepare.o proclang.o \
-       schemacmds.o seclabel.o sequence.o tablecmds.o tablespace.o trigger.o \
-       tsearchcmds.o typecmds.o user.o vacuum.o vacuumlazy.o \
-       variable.o view.o
+       policy.o portalcmds.o prepare.o proclang.o publicationcmds.o \
+       schemacmds.o seclabel.o sequence.o subscriptioncmds.o tablecmds.o \
+       tablespace.o trigger.o tsearchcmds.o typecmds.o user.o vacuum.o \
+       vacuumlazy.o variable.o view.o
 
 include $(top_srcdir)/src/backend/common.mk
index 8b6f42090969bdec199b5a161bd728ae831232ef..768fcc82ddce8949ca7728f390d19f16becdd9a1 100644 (file)
@@ -45,7 +45,9 @@
 #include "commands/extension.h"
 #include "commands/policy.h"
 #include "commands/proclang.h"
+#include "commands/publicationcmds.h"
 #include "commands/schemacmds.h"
+#include "commands/subscriptioncmds.h"
 #include "commands/tablecmds.h"
 #include "commands/tablespace.h"
 #include "commands/trigger.h"
@@ -770,6 +772,14 @@ ExecAlterOwnerStmt(AlterOwnerStmt *stmt)
                        return AlterEventTriggerOwner(strVal(linitial(stmt->object)),
                                                                                  newowner);
 
+               case OBJECT_PUBLICATION:
+                       return AlterPublicationOwner(strVal(linitial(stmt->object)),
+                                                                                newowner);
+
+               case OBJECT_SUBSCRIPTION:
+                       return AlterSubscriptionOwner(strVal(linitial(stmt->object)),
+                                                                                 newowner);
+
                        /* Generic cases */
                case OBJECT_AGGREGATE:
                case OBJECT_COLLATION:
index 2833f3e846965d1c2c80df83304ab9e54ea72a01..6ad8fd77b109bb0c7450945dd4c442b8bccc7543 100644 (file)
@@ -37,6 +37,7 @@
 #include "catalog/pg_authid.h"
 #include "catalog/pg_database.h"
 #include "catalog/pg_db_role_setting.h"
+#include "catalog/pg_subscription.h"
 #include "catalog/pg_tablespace.h"
 #include "commands/comment.h"
 #include "commands/dbcommands.h"
@@ -790,6 +791,7 @@ dropdb(const char *dbname, bool missing_ok)
        int                     npreparedxacts;
        int                     nslots,
                                nslots_active;
+       int                     nsubscriptions;
 
        /*
         * Look up the target database's OID, and get exclusive lock on it. We
@@ -874,6 +876,21 @@ dropdb(const char *dbname, bool missing_ok)
                                                dbname),
                                 errdetail_busy_db(notherbackends, npreparedxacts)));
 
+       /*
+        * Check if there are subscriptions defined in the target database.
+        *
+        * We can't drop them automatically because they might be holding
+        * resources in other databases/instances.
+        */
+       if ((nsubscriptions = CountDBSubscriptions(db_id)) > 0)
+               ereport(ERROR,
+                               (errcode(ERRCODE_OBJECT_IN_USE),
+                                errmsg("database \"%s\" is being used by logical replication subscription",
+                                               dbname),
+                                errdetail_plural("There is %d subscription.",
+                                                                 "There are %d subscriptions.",
+                                                                 nsubscriptions, nsubscriptions)));
+
        /*
         * Remove the database's tuple from pg_database.
         */
index 714b5252c738d0cc682a1ca1324e4b40cefeb488..8da924517b9a69fd43ec06297e6d1829e1ea04c3 100644 (file)
@@ -319,3 +319,31 @@ defGetTypeLength(DefElem *def)
                                        def->defname, defGetString(def))));
        return 0;                                       /* keep compiler quiet */
 }
+
+/*
+ * Extract a list of string values (otherwise uninterpreted) from a DefElem.
+ */
+List *
+defGetStringList(DefElem *def)
+{
+       ListCell   *cell;
+
+       if (def->arg == NULL)
+               ereport(ERROR,
+                               (errcode(ERRCODE_SYNTAX_ERROR),
+                                errmsg("%s requires a parameter",
+                                               def->defname)));
+       if (nodeTag(def->arg) != T_List)
+               elog(ERROR, "unrecognized node type: %d", (int) nodeTag(def->arg));
+
+       foreach(cell, (List *)def->arg)
+       {
+               Node       *str = (Node *) lfirst(cell);
+
+               if (!IsA(str, String))
+                       elog(ERROR, "unexpected node type in name list: %d",
+                                (int) nodeTag(str));
+       }
+
+       return (List *) def->arg;
+}
index 96436c06897dc147172882c649cf6dad63509c53..8cfbcf43f799cfd9ca6be362f4288b764bc7185a 100644 (file)
@@ -441,6 +441,10 @@ does_not_exist_skipping(ObjectType objtype, List *objname, List *objargs)
                                }
                        }
                        break;
+               case OBJECT_PUBLICATION:
+                       msg = gettext_noop("publication \"%s\" does not exist, skipping");
+                       name = NameListToString(objname);
+                       break;
                default:
                        elog(ERROR, "unrecognized object type: %d", (int) objtype);
                        break;
index c0061e195eb34f668add57293c952d835eb1f01e..812553736151211bd2db56d6964d237bb7238e01 100644 (file)
@@ -106,11 +106,13 @@ static event_trigger_support_data event_trigger_support[] = {
        {"OPERATOR CLASS", true},
        {"OPERATOR FAMILY", true},
        {"POLICY", true},
+       {"PUBLICATION", true},
        {"ROLE", false},
        {"RULE", true},
        {"SCHEMA", true},
        {"SEQUENCE", true},
        {"SERVER", true},
+       {"SUBSCRIPTION", true},
        {"TABLE", true},
        {"TABLESPACE", false},
        {"TRANSFORM", true},
@@ -1103,9 +1105,12 @@ EventTriggerSupportsObjectType(ObjectType obtype)
                case OBJECT_OPERATOR:
                case OBJECT_OPFAMILY:
                case OBJECT_POLICY:
+               case OBJECT_PUBLICATION:
+               case OBJECT_PUBLICATION_REL:
                case OBJECT_RULE:
                case OBJECT_SCHEMA:
                case OBJECT_SEQUENCE:
+               case OBJECT_SUBSCRIPTION:
                case OBJECT_TABCONSTRAINT:
                case OBJECT_TABLE:
                case OBJECT_TRANSFORM:
@@ -1168,6 +1173,9 @@ EventTriggerSupportsObjectClass(ObjectClass objclass)
                case OCLASS_EXTENSION:
                case OCLASS_POLICY:
                case OCLASS_AM:
+               case OCLASS_PUBLICATION:
+               case OCLASS_PUBLICATION_REL:
+               case OCLASS_SUBSCRIPTION:
                        return true;
        }
 
diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c
new file mode 100644 (file)
index 0000000..21e523d
--- /dev/null
@@ -0,0 +1,754 @@
+/*-------------------------------------------------------------------------
+ *
+ * publicationcmds.c
+ *             publication manipulation
+ *
+ * Copyright (c) 2016, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *             publicationcmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "funcapi.h"
+#include "miscadmin.h"
+
+#include "access/genam.h"
+#include "access/hash.h"
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "access/xact.h"
+
+#include "catalog/catalog.h"
+#include "catalog/indexing.h"
+#include "catalog/namespace.h"
+#include "catalog/objectaccess.h"
+#include "catalog/objectaddress.h"
+#include "catalog/pg_inherits_fn.h"
+#include "catalog/pg_type.h"
+#include "catalog/pg_publication.h"
+#include "catalog/pg_publication_rel.h"
+
+#include "commands/dbcommands.h"
+#include "commands/defrem.h"
+#include "commands/event_trigger.h"
+#include "commands/publicationcmds.h"
+
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/catcache.h"
+#include "utils/fmgroids.h"
+#include "utils/inval.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+/* Same as MAXNUMMESSAGES in sinvaladt.c */
+#define MAX_RELCACHE_INVAL_MSGS 4096
+
+static List *OpenTableList(List *tables);
+static void CloseTableList(List *rels);
+static void PublicationAddTables(Oid pubid, List *rels, bool if_not_exists,
+                                        AlterPublicationStmt *stmt);
+static void PublicationDropTables(Oid pubid, List *rels, bool missing_ok);
+
+static void
+parse_publication_options(List *options,
+                                                 bool *publish_insert_given,
+                                                 bool *publish_insert,
+                                                 bool *publish_update_given,
+                                                 bool *publish_update,
+                                                 bool *publish_delete_given,
+                                                 bool *publish_delete)
+{
+       ListCell   *lc;
+
+       *publish_insert_given = false;
+       *publish_update_given = false;
+       *publish_delete_given = false;
+
+       /* Defaults are true */
+       *publish_insert = true;
+       *publish_update = true;
+       *publish_delete = true;
+
+       /* Parse options */
+       foreach (lc, options)
+       {
+               DefElem    *defel = (DefElem *) lfirst(lc);
+
+               if (strcmp(defel->defname, "publish insert") == 0)
+               {
+                       if (*publish_insert_given)
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_SYNTAX_ERROR),
+                                                errmsg("conflicting or redundant options")));
+
+                       *publish_insert_given = true;
+                       *publish_insert = defGetBoolean(defel);
+               }
+               else if (strcmp(defel->defname, "nopublish insert") == 0)
+               {
+                       if (*publish_insert_given)
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_SYNTAX_ERROR),
+                                                errmsg("conflicting or redundant options")));
+
+                       *publish_insert_given = true;
+                       *publish_insert = !defGetBoolean(defel);
+               }
+               else if (strcmp(defel->defname, "publish update") == 0)
+               {
+                       if (*publish_update_given)
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_SYNTAX_ERROR),
+                                                errmsg("conflicting or redundant options")));
+
+                       *publish_update_given = true;
+                       *publish_update = defGetBoolean(defel);
+               }
+               else if (strcmp(defel->defname, "nopublish update") == 0)
+               {
+                       if (*publish_update_given)
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_SYNTAX_ERROR),
+                                                errmsg("conflicting or redundant options")));
+
+                       *publish_update_given = true;
+                       *publish_update = !defGetBoolean(defel);
+               }
+               else if (strcmp(defel->defname, "publish delete") == 0)
+               {
+                       if (*publish_delete_given)
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_SYNTAX_ERROR),
+                                                errmsg("conflicting or redundant options")));
+
+                       *publish_delete_given = true;
+                       *publish_delete = defGetBoolean(defel);
+               }
+               else if (strcmp(defel->defname, "nopublish delete") == 0)
+               {
+                       if (*publish_delete_given)
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_SYNTAX_ERROR),
+                                                errmsg("conflicting or redundant options")));
+
+                       *publish_delete_given = true;
+                       *publish_delete = !defGetBoolean(defel);
+               }
+               else
+                       elog(ERROR, "unrecognized option: %s", defel->defname);
+       }
+}
+
+/*
+ * Create new publication.
+ */
+ObjectAddress
+CreatePublication(CreatePublicationStmt *stmt)
+{
+       Relation        rel;
+       ObjectAddress myself;
+       Oid                     puboid;
+       bool            nulls[Natts_pg_publication];
+       Datum           values[Natts_pg_publication];
+       HeapTuple       tup;
+       bool            publish_insert_given;
+       bool            publish_update_given;
+       bool            publish_delete_given;
+       bool            publish_insert;
+       bool            publish_update;
+       bool            publish_delete;
+       AclResult       aclresult;
+
+       /* must have CREATE privilege on database */
+       aclresult = pg_database_aclcheck(MyDatabaseId, GetUserId(), ACL_CREATE);
+       if (aclresult != ACLCHECK_OK)
+               aclcheck_error(aclresult, ACL_KIND_DATABASE,
+                                          get_database_name(MyDatabaseId));
+
+       /* FOR ALL TABLES requires superuser */
+       if (stmt->for_all_tables && !superuser())
+               ereport(ERROR,
+                               (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+                                (errmsg("must be superuser to create FOR ALL TABLES publication"))));
+
+       rel = heap_open(PublicationRelationId, RowExclusiveLock);
+
+       /* Check if name is used */
+       puboid = GetSysCacheOid1(PUBLICATIONNAME, CStringGetDatum(stmt->pubname));
+       if (OidIsValid(puboid))
+       {
+               ereport(ERROR,
+                               (errcode(ERRCODE_DUPLICATE_OBJECT),
+                                errmsg("publication \"%s\" already exists",
+                                               stmt->pubname)));
+       }
+
+       /* Form a tuple. */
+       memset(values, 0, sizeof(values));
+       memset(nulls, false, sizeof(nulls));
+
+       values[Anum_pg_publication_pubname - 1] =
+               DirectFunctionCall1(namein, CStringGetDatum(stmt->pubname));
+       values[Anum_pg_publication_pubowner - 1] = ObjectIdGetDatum(GetUserId());
+
+       parse_publication_options(stmt->options,
+                                                         &publish_insert_given, &publish_insert,
+                                                         &publish_update_given, &publish_update,
+                                                         &publish_delete_given, &publish_delete);
+
+       values[Anum_pg_publication_puballtables - 1] =
+               BoolGetDatum(stmt->for_all_tables);
+       values[Anum_pg_publication_pubinsert - 1] =
+               BoolGetDatum(publish_insert);
+       values[Anum_pg_publication_pubupdate - 1] =
+               BoolGetDatum(publish_update);
+       values[Anum_pg_publication_pubdelete - 1] =
+               BoolGetDatum(publish_delete);
+
+       tup = heap_form_tuple(RelationGetDescr(rel), values, nulls);
+
+       /* Insert tuple into catalog. */
+       puboid = simple_heap_insert(rel, tup);
+       CatalogUpdateIndexes(rel, tup);
+       heap_freetuple(tup);
+
+       ObjectAddressSet(myself, PublicationRelationId, puboid);
+
+       /* Make the changes visible. */
+       CommandCounterIncrement();
+
+       if (stmt->tables)
+       {
+               List       *rels;
+
+               Assert(list_length(stmt->tables) > 0);
+
+               rels = OpenTableList(stmt->tables);
+               PublicationAddTables(puboid, rels, true, NULL);
+               CloseTableList(rels);
+       }
+
+       heap_close(rel, RowExclusiveLock);
+
+       InvokeObjectPostCreateHook(PublicationRelationId, puboid, 0);
+
+       return myself;
+}
+
+/*
+ * Change options of a publication.
+ */
+static void
+AlterPublicationOptions(AlterPublicationStmt *stmt, Relation rel,
+                                          HeapTuple tup)
+{
+       bool            nulls[Natts_pg_publication];
+       bool            replaces[Natts_pg_publication];
+       Datum           values[Natts_pg_publication];
+       bool            publish_insert_given;
+       bool            publish_update_given;
+       bool            publish_delete_given;
+       bool            publish_insert;
+       bool            publish_update;
+       bool            publish_delete;
+       ObjectAddress           obj;
+
+       parse_publication_options(stmt->options,
+                                                         &publish_insert_given, &publish_insert,
+                                                         &publish_update_given, &publish_update,
+                                                         &publish_delete_given, &publish_delete);
+
+       /* Everything ok, form a new tuple. */
+       memset(values, 0, sizeof(values));
+       memset(nulls, false, sizeof(nulls));
+       memset(replaces, false, sizeof(replaces));
+
+       if (publish_insert_given)
+       {
+               values[Anum_pg_publication_pubinsert - 1] =
+                       BoolGetDatum(publish_insert);
+               replaces[Anum_pg_publication_pubinsert - 1] = true;
+       }
+       if (publish_update_given)
+       {
+               values[Anum_pg_publication_pubupdate - 1] =
+                       BoolGetDatum(publish_update);
+               replaces[Anum_pg_publication_pubupdate - 1] = true;
+       }
+       if (publish_delete_given)
+       {
+               values[Anum_pg_publication_pubdelete - 1] =
+                       BoolGetDatum(publish_delete);
+               replaces[Anum_pg_publication_pubdelete - 1] = true;
+       }
+
+       tup = heap_modify_tuple(tup, RelationGetDescr(rel), values, nulls,
+                                                       replaces);
+
+       /* Update the catalog. */
+       simple_heap_update(rel, &tup->t_self, tup);
+       CatalogUpdateIndexes(rel, tup);
+
+       CommandCounterIncrement();
+
+       /* Invalidate the relcache. */
+       if (((Form_pg_publication) GETSTRUCT(tup))->puballtables)
+       {
+               CacheInvalidateRelcacheAll();
+       }
+       else
+       {
+               List    *relids = GetPublicationRelations(HeapTupleGetOid(tup));
+
+               /*
+                * We don't want to send too many individual messages, at some point
+                * it's cheaper to just reset whole relcache.
+                */
+               if (list_length(relids) < MAX_RELCACHE_INVAL_MSGS)
+               {
+                       ListCell *lc;
+
+                       foreach (lc, relids)
+                       {
+                               Oid     relid = lfirst_oid(lc);
+
+                               CacheInvalidateRelcacheByRelid(relid);
+                       }
+               }
+               else
+                       CacheInvalidateRelcacheAll();
+       }
+
+       ObjectAddressSet(obj, PublicationRelationId, HeapTupleGetOid(tup));
+       EventTriggerCollectSimpleCommand(obj, InvalidObjectAddress,
+                                                                        (Node *) stmt);
+
+       InvokeObjectPostAlterHook(PublicationRelationId, HeapTupleGetOid(tup), 0);
+}
+
+/*
+ * Add or remove table to/from publication.
+ */
+static void
+AlterPublicationTables(AlterPublicationStmt *stmt, Relation rel,
+                                          HeapTuple tup)
+{
+       Oid                     pubid = HeapTupleGetOid(tup);
+       List       *rels = NIL;
+       Form_pg_publication pubform = (Form_pg_publication) GETSTRUCT(tup);
+
+       /* Check that user is allowed to manipulate the publication tables. */
+       if (pubform->puballtables)
+               ereport(ERROR,
+                               (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                                errmsg("publication \"%s\" is defined as FOR ALL TABLES",
+                                               NameStr(pubform->pubname)),
+                                errdetail("Tables cannot be added to or dropped from FOR ALL TABLES publications.")));
+
+       Assert(list_length(stmt->tables) > 0);
+
+       rels = OpenTableList(stmt->tables);
+
+       if (stmt->tableAction == DEFELEM_ADD)
+               PublicationAddTables(pubid, rels, false, stmt);
+       else if (stmt->tableAction == DEFELEM_DROP)
+               PublicationDropTables(pubid, rels, false);
+       else /* DEFELEM_SET */
+       {
+               List       *oldrelids = GetPublicationRelations(pubid);
+               List       *delrels = NIL;
+               ListCell   *oldlc;
+
+               /* Calculate which relations to drop. */
+               foreach(oldlc, oldrelids)
+               {
+                       Oid                     oldrelid = lfirst_oid(oldlc);
+                       ListCell   *newlc;
+                       bool            found = false;
+
+                       foreach(newlc, rels)
+                       {
+                               Relation        newrel = (Relation) lfirst(newlc);
+
+                               if (RelationGetRelid(newrel) == oldrelid)
+                               {
+                                       found = true;
+                                       break;
+                               }
+                       }
+
+                       if (!found)
+                       {
+                               Relation        oldrel = heap_open(oldrelid,
+                                                                                          ShareUpdateExclusiveLock);
+                               delrels = lappend(delrels, oldrel);
+                       }
+               }
+
+               /* And drop them. */
+               PublicationDropTables(pubid, delrels, true);
+
+               /*
+                * Don't bother calculating the difference for adding, we'll catch
+                * and skip existing ones when doing catalog update.
+                */
+               PublicationAddTables(pubid, rels, true, stmt);
+
+               CloseTableList(delrels);
+       }
+
+       CloseTableList(rels);
+}
+
+/*
+ * Alter the existing publication.
+ *
+ * This is dispatcher function for AlterPublicationOptions and
+ * AlterPublicationTables.
+ */
+void
+AlterPublication(AlterPublicationStmt *stmt)
+{
+       Relation                rel;
+       HeapTuple               tup;
+
+       rel = heap_open(PublicationRelationId, RowExclusiveLock);
+
+       tup = SearchSysCacheCopy1(PUBLICATIONNAME,
+                                                         CStringGetDatum(stmt->pubname));
+
+       if (!HeapTupleIsValid(tup))
+               ereport(ERROR,
+                               (errcode(ERRCODE_UNDEFINED_OBJECT),
+                                errmsg("publication \"%s\" does not exist",
+                                               stmt->pubname)));
+
+       /* must be owner */
+       if (!pg_publication_ownercheck(HeapTupleGetOid(tup), GetUserId()))
+               aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_PUBLICATION,
+                                          stmt->pubname);
+
+       if (stmt->options)
+               AlterPublicationOptions(stmt, rel, tup);
+       else
+               AlterPublicationTables(stmt, rel, tup);
+
+       /* Cleanup. */
+       heap_freetuple(tup);
+       heap_close(rel, RowExclusiveLock);
+}
+
+/*
+ * Drop publication by OID
+ */
+void
+RemovePublicationById(Oid pubid)
+{
+       Relation        rel;
+       HeapTuple       tup;
+
+       rel = heap_open(PublicationRelationId, RowExclusiveLock);
+
+       tup = SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(pubid));
+
+       if (!HeapTupleIsValid(tup))
+               elog(ERROR, "cache lookup failed for publication %u", pubid);
+
+       simple_heap_delete(rel, &tup->t_self);
+
+       ReleaseSysCache(tup);
+
+       heap_close(rel, RowExclusiveLock);
+}
+
+/*
+ * Remove relation from publication by mapping OID.
+ */
+void
+RemovePublicationRelById(Oid proid)
+{
+       Relation        rel;
+       HeapTuple       tup;
+       Form_pg_publication_rel         pubrel;
+
+       rel = heap_open(PublicationRelRelationId, RowExclusiveLock);
+
+       tup = SearchSysCache1(PUBLICATIONREL, ObjectIdGetDatum(proid));
+
+       if (!HeapTupleIsValid(tup))
+               elog(ERROR, "cache lookup failed for publication table %u",
+                        proid);
+
+
+       pubrel = (Form_pg_publication_rel) GETSTRUCT(tup);
+
+       /* Invalidate relcache so that publication info is rebuilt. */
+       CacheInvalidateRelcacheByRelid(pubrel->prrelid);
+
+       simple_heap_delete(rel, &tup->t_self);
+
+       ReleaseSysCache(tup);
+
+       heap_close(rel, RowExclusiveLock);
+}
+
+/*
+ * Open relations based om provided by RangeVar list.
+ * The returned tables are locked in ShareUpdateExclusiveLock mode.
+ */
+static List *
+OpenTableList(List *tables)
+{
+       List       *relids = NIL;
+       List       *rels = NIL;
+       ListCell   *lc;
+
+       /*
+        * Open, share-lock, and check all the explicitly-specified relations
+        */
+       foreach(lc, tables)
+       {
+               RangeVar   *rv = lfirst(lc);
+               Relation        rel;
+               bool            recurse = rv->inh;
+               Oid                     myrelid;
+
+               CHECK_FOR_INTERRUPTS();
+
+               rel = heap_openrv(rv, ShareUpdateExclusiveLock);
+               myrelid = RelationGetRelid(rel);
+               /*
+                * filter out duplicates when user specifies "foo, foo"
+                * Note that this algrithm is know to not be very effective (O(N^2))
+                * but given that it only works on list of tables given to us by user
+                * it's deemed acceptable.
+                */
+               if (list_member_oid(relids, myrelid))
+               {
+                       heap_close(rel, ShareUpdateExclusiveLock);
+                       continue;
+               }
+               rels = lappend(rels, rel);
+               relids = lappend_oid(relids, myrelid);
+
+               if (recurse)
+               {
+                       ListCell   *child;
+                       List       *children;
+
+                       children = find_all_inheritors(myrelid, ShareUpdateExclusiveLock,
+                                                                                  NULL);
+
+                       foreach(child, children)
+                       {
+                               Oid                     childrelid = lfirst_oid(child);
+
+                               if (list_member_oid(relids, childrelid))
+                                       continue;
+
+                               /*
+                                * Skip duplicates if user specified both parent and child
+                                * tables.
+                                */
+                               if (list_member_oid(relids, childrelid))
+                               {
+                                       heap_close(rel, ShareUpdateExclusiveLock);
+                                       continue;
+                               }
+
+                               /* find_all_inheritors already got lock */
+                               rel = heap_open(childrelid, NoLock);
+                               rels = lappend(rels, rel);
+                               relids = lappend_oid(relids, childrelid);
+                       }
+               }
+       }
+
+       list_free(relids);
+
+       return rels;
+}
+
+/*
+ * Close all relations in the list.
+ */
+static void
+CloseTableList(List *rels)
+{
+       ListCell   *lc;
+
+       foreach(lc, rels)
+       {
+               Relation        rel = (Relation) lfirst(lc);
+
+               heap_close(rel, NoLock);
+       }
+}
+
+/*
+ * Add listed tables to the publication.
+ */
+static void
+PublicationAddTables(Oid pubid, List *rels, bool if_not_exists,
+                                        AlterPublicationStmt *stmt)
+{
+       ListCell           *lc;
+
+       Assert(!stmt || !stmt->for_all_tables);
+
+       foreach(lc, rels)
+       {
+               Relation        rel = (Relation) lfirst(lc);
+               ObjectAddress   obj;
+
+               /* Must be owner of the table or superuser. */
+               if (!pg_class_ownercheck(RelationGetRelid(rel), GetUserId()))
+                       aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_CLASS,
+                                                  RelationGetRelationName(rel));
+
+               obj = publication_add_relation(pubid, rel, if_not_exists);
+               if (stmt)
+               {
+                       EventTriggerCollectSimpleCommand(obj, InvalidObjectAddress,
+                                                                                        (Node *) stmt);
+
+                       InvokeObjectPostCreateHook(PublicationRelRelationId,
+                                                                          obj.objectId, 0);
+               }
+       }
+}
+
+/*
+ * Remove listed tables from the publication.
+ */
+static void
+PublicationDropTables(Oid pubid, List *rels, bool missing_ok)
+{
+       ObjectAddress   obj;
+       ListCell           *lc;
+       Oid                             prid;
+
+       foreach(lc, rels)
+       {
+               Relation        rel = (Relation) lfirst(lc);
+               Oid                     relid = RelationGetRelid(rel);
+
+               prid = GetSysCacheOid2(PUBLICATIONRELMAP, ObjectIdGetDatum(relid),
+                                                          ObjectIdGetDatum(pubid));
+               if (!OidIsValid(prid))
+               {
+                       if (missing_ok)
+                               continue;
+
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_UNDEFINED_OBJECT),
+                                        errmsg("relation \"%s\" is not part of the publication",
+                                                       RelationGetRelationName(rel))));
+               }
+
+               ObjectAddressSet(obj, PublicationRelRelationId, prid);
+               performDeletion(&obj, DROP_CASCADE, 0);
+       }
+}
+
+/*
+ * Internal workhorse for changing a publication owner
+ */
+       static void
+AlterPublicationOwner_internal(Relation rel, HeapTuple tup, Oid newOwnerId)
+{
+       Form_pg_publication form;
+
+       form = (Form_pg_publication) GETSTRUCT(tup);
+
+       if (form->pubowner == newOwnerId)
+               return;
+
+       if (!pg_publication_ownercheck(HeapTupleGetOid(tup), GetUserId()))
+               aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_PUBLICATION,
+                                          NameStr(form->pubname));
+
+       /* New owner must be a superuser */
+       if (!superuser_arg(newOwnerId))
+               ereport(ERROR,
+                               (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+                                errmsg("permission denied to change owner of publication \"%s\"",
+                                               NameStr(form->pubname)),
+                                errhint("The owner of a publication must be a superuser.")));
+
+       form->pubowner = newOwnerId;
+       simple_heap_update(rel, &tup->t_self, tup);
+       CatalogUpdateIndexes(rel, tup);
+
+       /* Update owner dependency reference */
+       changeDependencyOnOwner(PublicationRelationId,
+                                                       HeapTupleGetOid(tup),
+                                                       newOwnerId);
+
+       InvokeObjectPostAlterHook(PublicationRelationId,
+                                                         HeapTupleGetOid(tup), 0);
+}
+
+/*
+ * Change publication owner -- by name
+ */
+ObjectAddress
+AlterPublicationOwner(const char *name, Oid newOwnerId)
+{
+       Oid                     subid;
+       HeapTuple       tup;
+       Relation        rel;
+       ObjectAddress address;
+
+       rel = heap_open(PublicationRelationId, RowExclusiveLock);
+
+       tup = SearchSysCacheCopy1(PUBLICATIONNAME, CStringGetDatum(name));
+
+       if (!HeapTupleIsValid(tup))
+               ereport(ERROR,
+                               (errcode(ERRCODE_UNDEFINED_OBJECT),
+                                errmsg("publication \"%s\" does not exist", name)));
+
+       subid = HeapTupleGetOid(tup);
+
+       AlterPublicationOwner_internal(rel, tup, newOwnerId);
+
+       ObjectAddressSet(address, PublicationRelationId, subid);
+
+       heap_freetuple(tup);
+
+       heap_close(rel, RowExclusiveLock);
+
+       return address;
+}
+
+/*
+ * Change publication owner -- by OID
+ */
+void
+AlterPublicationOwner_oid(Oid subid, Oid newOwnerId)
+{
+       HeapTuple       tup;
+       Relation        rel;
+
+       rel = heap_open(PublicationRelationId, RowExclusiveLock);
+
+       tup = SearchSysCacheCopy1(PUBLICATIONOID, ObjectIdGetDatum(subid));
+
+       if (!HeapTupleIsValid(tup))
+               ereport(ERROR,
+                               (errcode(ERRCODE_UNDEFINED_OBJECT),
+                                errmsg("publication with OID %u does not exist", subid)));
+
+       AlterPublicationOwner_internal(rel, tup, newOwnerId);
+
+       heap_freetuple(tup);
+
+       heap_close(rel, RowExclusiveLock);
+}
diff --git a/src/backend/commands/subscriptioncmds.c b/src/backend/commands/subscriptioncmds.c
new file mode 100644 (file)
index 0000000..1448ee3
--- /dev/null
@@ -0,0 +1,643 @@
+/*-------------------------------------------------------------------------
+ *
+ * subscriptioncmds.c
+ *             subscription catalog manipulation functions
+ *
+ * Copyright (c) 2015, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *             subscriptioncmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "miscadmin.h"
+
+#include "access/heapam.h"
+#include "access/htup_details.h"
+
+#include "catalog/indexing.h"
+#include "catalog/objectaccess.h"
+#include "catalog/objectaddress.h"
+#include "catalog/pg_type.h"
+#include "catalog/pg_subscription.h"
+
+#include "commands/defrem.h"
+#include "commands/event_trigger.h"
+#include "commands/subscriptioncmds.h"
+
+#include "replication/logicallauncher.h"
+#include "replication/origin.h"
+#include "replication/walreceiver.h"
+#include "replication/worker_internal.h"
+
+#include "storage/lmgr.h"
+
+#include "utils/builtins.h"
+#include "utils/memutils.h"
+#include "utils/syscache.h"
+
+/*
+ * Common option parsing function for CREATE and ALTER SUBSCRIPTION commands.
+ *
+ * Since not all options can be specified in both commands, this function
+ * will report an error on options if the target output pointer is NULL to
+ * accomodate that.
+ */
+static void
+parse_subscription_options(List *options, char **conninfo,
+                                                  List **publications, bool *enabled_given,
+                                                  bool *enabled, bool *create_slot, char **slot_name)
+{
+       ListCell   *lc;
+       bool            create_slot_given = false;
+
+       if (conninfo)
+               *conninfo = NULL;
+       if (publications)
+               *publications = NIL;
+       if (enabled)
+       {
+               *enabled_given = false;
+               *enabled = true;
+       }
+       if (create_slot)
+               *create_slot = true;
+       if (slot_name)
+               *slot_name = NULL;
+
+       /* Parse options */
+       foreach (lc, options)
+       {
+               DefElem    *defel = (DefElem *) lfirst(lc);
+
+               if (strcmp(defel->defname, "conninfo") == 0 && conninfo)
+               {
+                       if (*conninfo)
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_SYNTAX_ERROR),
+                                                errmsg("conflicting or redundant options")));
+
+                       *conninfo = defGetString(defel);
+               }
+               else if (strcmp(defel->defname, "publication") == 0 && publications)
+               {
+                       if (*publications)
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_SYNTAX_ERROR),
+                                                errmsg("conflicting or redundant options")));
+
+                       *publications = defGetStringList(defel);
+               }
+               else if (strcmp(defel->defname, "enabled") == 0 && enabled)
+               {
+                       if (*enabled_given)
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_SYNTAX_ERROR),
+                                                errmsg("conflicting or redundant options")));
+
+                       *enabled_given = true;
+                       *enabled = defGetBoolean(defel);
+               }
+               else if (strcmp(defel->defname, "disabled") == 0 && enabled)
+               {
+                       if (*enabled_given)
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_SYNTAX_ERROR),
+                                                errmsg("conflicting or redundant options")));
+
+                       *enabled_given = true;
+                       *enabled = !defGetBoolean(defel);
+               }
+               else if (strcmp(defel->defname, "create slot") == 0 && create_slot)
+               {
+                       if (create_slot_given)
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_SYNTAX_ERROR),
+                                                errmsg("conflicting or redundant options")));
+
+                       create_slot_given = true;
+                       *create_slot = defGetBoolean(defel);
+               }
+               else if (strcmp(defel->defname, "nocreate slot") == 0 && create_slot)
+               {
+                       if (create_slot_given)
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_SYNTAX_ERROR),
+                                                errmsg("conflicting or redundant options")));
+
+                       create_slot_given = true;
+                       *create_slot = !defGetBoolean(defel);
+               }
+               else if (strcmp(defel->defname, "slot name") == 0 && slot_name)
+               {
+                       if (*slot_name)
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_SYNTAX_ERROR),
+                                                errmsg("conflicting or redundant options")));
+
+                       *slot_name = defGetString(defel);
+               }
+               else
+                       elog(ERROR, "unrecognized option: %s", defel->defname);
+       }
+}
+
+/*
+ * Auxiliary function to return a text array out of a list of String nodes.
+ */
+static Datum
+publicationListToArray(List *publist)
+{
+       ArrayType  *arr;
+       Datum      *datums;
+       int                     j = 0;
+       ListCell   *cell;
+       MemoryContext memcxt;
+       MemoryContext oldcxt;
+
+       /* Create memory context for temporary allocations. */
+       memcxt = AllocSetContextCreate(CurrentMemoryContext,
+                                                                  "publicationListToArray to array",
+                                                                  ALLOCSET_DEFAULT_MINSIZE,
+                                                                  ALLOCSET_DEFAULT_INITSIZE,
+                                                                  ALLOCSET_DEFAULT_MAXSIZE);
+       oldcxt = MemoryContextSwitchTo(memcxt);
+
+       datums = palloc(sizeof(text *) * list_length(publist));
+       foreach(cell, publist)
+       {
+               char       *name = strVal(lfirst(cell));
+               ListCell   *pcell;
+
+               /* Check for duplicates. */
+               foreach(pcell, publist)
+               {
+                       char       *pname = strVal(lfirst(cell));
+
+                       if (name == pname)
+                               break;
+
+                       if (strcmp(name, pname) == 0)
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_SYNTAX_ERROR),
+                                                errmsg("publication name \"%s\" used more than once",
+                                                               pname)));
+               }
+
+               datums[j++] = CStringGetTextDatum(name);
+       }
+
+       MemoryContextSwitchTo(oldcxt);
+
+       arr = construct_array(datums, list_length(publist),
+                                                 TEXTOID, -1, false, 'i');
+       MemoryContextDelete(memcxt);
+
+       return PointerGetDatum(arr);
+}
+
+/*
+ * Create new subscription.
+ */
+ObjectAddress
+CreateSubscription(CreateSubscriptionStmt *stmt)
+{
+       Relation        rel;
+       ObjectAddress myself;
+       Oid                     subid;
+       bool            nulls[Natts_pg_subscription];
+       Datum           values[Natts_pg_subscription];
+       HeapTuple       tup;
+       bool            enabled_given;
+       bool            enabled;
+       char       *conninfo;
+       char       *slotname;
+       char            originname[NAMEDATALEN];
+       bool            create_slot;
+       List       *publications;
+
+       if (!superuser())
+               ereport(ERROR,
+                               (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+                                (errmsg("must be superuser to create subscriptions"))));
+
+       rel = heap_open(SubscriptionRelationId, RowExclusiveLock);
+
+       /* Check if name is used */
+       subid = GetSysCacheOid2(SUBSCRIPTIONNAME, MyDatabaseId,
+                                                       CStringGetDatum(stmt->subname));
+       if (OidIsValid(subid))
+       {
+               ereport(ERROR,
+                               (errcode(ERRCODE_DUPLICATE_OBJECT),
+                                errmsg("subscription \"%s\" already exists",
+                                               stmt->subname)));
+       }
+
+       /*
+        * Parse and check options.
+        * Connection and publication should not be specified here.
+        */
+       parse_subscription_options(stmt->options, NULL, NULL,
+                                                          &enabled_given, &enabled,
+                                                          &create_slot, &slotname);
+       if (slotname == NULL)
+               slotname = stmt->subname;
+
+       conninfo = stmt->conninfo;
+       publications = stmt->publication;
+
+       /* Load the library providing us libpq calls. */
+       load_file("libpqwalreceiver", false);
+
+       /* Check the connection info string. */
+       walrcv_check_conninfo(conninfo);
+
+       /* Everything ok, form a new tuple. */
+       memset(values, 0, sizeof(values));
+       memset(nulls, false, sizeof(nulls));
+
+       values[Anum_pg_subscription_subdbid - 1] = ObjectIdGetDatum(MyDatabaseId);
+       values[Anum_pg_subscription_subname - 1] =
+               DirectFunctionCall1(namein, CStringGetDatum(stmt->subname));
+       values[Anum_pg_subscription_subowner - 1] = ObjectIdGetDatum(GetUserId());
+       values[Anum_pg_subscription_subenabled - 1] = BoolGetDatum(enabled);
+       values[Anum_pg_subscription_subconninfo - 1] =
+               CStringGetTextDatum(conninfo);
+       values[Anum_pg_subscription_subslotname - 1] =
+               DirectFunctionCall1(namein, CStringGetDatum(slotname));
+       values[Anum_pg_subscription_subpublications - 1] =
+                publicationListToArray(publications);
+
+       tup = heap_form_tuple(RelationGetDescr(rel), values, nulls);
+
+       /* Insert tuple into catalog. */
+       subid = simple_heap_insert(rel, tup);
+       CatalogUpdateIndexes(rel, tup);
+       heap_freetuple(tup);
+
+       snprintf(originname, sizeof(originname), "pg_%u", subid);
+       replorigin_create(originname);
+
+       /*
+        * If requested, create the replication slot on remote side for our
+        * newly created subscription.
+        */
+       if (create_slot)
+       {
+               XLogRecPtr                      lsn;
+               char                       *err;
+               WalReceiverConn    *wrconn;
+
+               /* Try to connect to the publisher. */
+               wrconn = walrcv_connect(conninfo, true, stmt->subname, &err);
+               if (!wrconn)
+                       ereport(ERROR,
+                                       (errmsg("could not connect to the publisher: %s", err)));
+
+               walrcv_create_slot(wrconn, slotname, false, &lsn);
+               ereport(NOTICE,
+                               (errmsg("created replication slot \"%s\" on publisher",
+                                               slotname)));
+
+               /* And we are done with the remote side. */
+               walrcv_disconnect(wrconn);
+       }
+
+       heap_close(rel, RowExclusiveLock);
+
+       ApplyLauncherWakeupAtCommit();
+
+       ObjectAddressSet(myself, SubscriptionRelationId, subid);
+
+       InvokeObjectPostCreateHook(SubscriptionRelationId, subid, 0);
+
+       return myself;
+}
+
+/*
+ * Alter the existing subscription.
+ */
+ObjectAddress
+AlterSubscription(AlterSubscriptionStmt *stmt)
+{
+       Relation        rel;
+       ObjectAddress myself;
+       bool            nulls[Natts_pg_subscription];
+       bool            replaces[Natts_pg_subscription];
+       Datum           values[Natts_pg_subscription];
+       HeapTuple       tup;
+       Oid                     subid;
+       bool            enabled_given;
+       bool            enabled;
+       char       *conninfo;
+       char       *slot_name;
+       List       *publications;
+
+       rel = heap_open(SubscriptionRelationId, RowExclusiveLock);
+
+       /* Fetch the existing tuple. */
+       tup = SearchSysCacheCopy2(SUBSCRIPTIONNAME, MyDatabaseId,
+                                                         CStringGetDatum(stmt->subname));
+
+       if (!HeapTupleIsValid(tup))
+               ereport(ERROR,
+                               (errcode(ERRCODE_UNDEFINED_OBJECT),
+                                errmsg("subscription \"%s\" does not exist",
+                                               stmt->subname)));
+
+       /* must be owner */
+       if (!pg_subscription_ownercheck(HeapTupleGetOid(tup), GetUserId()))
+               aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_SUBSCRIPTION,
+                                          stmt->subname);
+
+       subid = HeapTupleGetOid(tup);
+
+       /* Parse options. */
+       parse_subscription_options(stmt->options, &conninfo, &publications,
+                                                          &enabled_given, &enabled,
+                                                          NULL, &slot_name);
+
+       /* Form a new tuple. */
+       memset(values, 0, sizeof(values));
+       memset(nulls, false, sizeof(nulls));
+       memset(replaces, false, sizeof(replaces));
+
+       if (enabled_given)
+       {
+               values[Anum_pg_subscription_subenabled - 1] = BoolGetDatum(enabled);
+               replaces[Anum_pg_subscription_subenabled - 1] = true;
+       }
+       if (conninfo)
+       {
+               values[Anum_pg_subscription_subconninfo - 1] =
+                       CStringGetTextDatum(conninfo);
+               replaces[Anum_pg_subscription_subconninfo - 1] = true;
+       }
+       if (slot_name)
+       {
+               values[Anum_pg_subscription_subslotname - 1] =
+                       DirectFunctionCall1(namein, CStringGetDatum(slot_name));
+               replaces[Anum_pg_subscription_subslotname - 1] = true;
+       }
+       if (publications != NIL)
+       {
+               values[Anum_pg_subscription_subpublications - 1] =
+                        publicationListToArray(publications);
+               replaces[Anum_pg_subscription_subpublications - 1] = true;
+       }
+
+       tup = heap_modify_tuple(tup, RelationGetDescr(rel), values, nulls,
+                                                       replaces);
+
+       /* Update the catalog. */
+       simple_heap_update(rel, &tup->t_self, tup);
+       CatalogUpdateIndexes(rel, tup);
+
+       ObjectAddressSet(myself, SubscriptionRelationId, subid);
+
+       /* Cleanup. */
+       heap_freetuple(tup);
+       heap_close(rel, RowExclusiveLock);
+
+       InvokeObjectPostAlterHook(SubscriptionRelationId, subid, 0);
+
+       return myself;
+}
+
+/*
+ * Drop a subscription
+ */
+void
+DropSubscription(DropSubscriptionStmt *stmt)
+{
+       Relation        rel;
+       ObjectAddress myself;
+       HeapTuple       tup;
+       Oid                     subid;
+       Datum           datum;
+       bool            isnull;
+       char       *subname;
+       char       *conninfo;
+       char       *slotname;
+       char            originname[NAMEDATALEN];
+       char       *err = NULL;
+       RepOriginId     originid;
+       WalReceiverConn    *wrconn = NULL;
+       StringInfoData          cmd;
+
+       rel = heap_open(SubscriptionRelationId, RowExclusiveLock);
+
+       tup = SearchSysCache2(SUBSCRIPTIONNAME, MyDatabaseId,
+                                                 CStringGetDatum(stmt->subname));
+
+       if (!HeapTupleIsValid(tup))
+       {
+               heap_close(rel, NoLock);
+
+               if (!stmt->missing_ok)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_UNDEFINED_OBJECT),
+                                        errmsg("subscription \"%s\" does not exist",
+                                                       stmt->subname)));
+               else
+                       ereport(NOTICE,
+                                       (errmsg("subscription \"%s\" does not exist, skipping",
+                                                       stmt->subname)));
+
+               return;
+       }
+
+       subid = HeapTupleGetOid(tup);
+
+       /* must be owner */
+       if (!pg_subscription_ownercheck(subid, GetUserId()))
+               aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_SUBSCRIPTION,
+                                          stmt->subname);
+
+       /* DROP hook for the subscription being removed */
+       InvokeObjectDropHook(SubscriptionRelationId, subid, 0);
+
+       /*
+        * Lock the subscription so noboby else can do anything with it
+        * (including the replication workers).
+        */
+       LockSharedObject(SubscriptionRelationId, subid, 0, AccessExclusiveLock);
+
+       /* Get subname */
+       datum = SysCacheGetAttr(SUBSCRIPTIONOID, tup,
+                                                       Anum_pg_subscription_subname, &isnull);
+       Assert(!isnull);
+       subname = pstrdup(NameStr(*DatumGetName(datum)));
+
+       /* Get conninfo */
+       datum = SysCacheGetAttr(SUBSCRIPTIONOID, tup,
+                                                       Anum_pg_subscription_subconninfo, &isnull);
+       Assert(!isnull);
+       conninfo = pstrdup(TextDatumGetCString(datum));
+
+       /* Get slotname */
+       datum = SysCacheGetAttr(SUBSCRIPTIONOID, tup,
+                                                       Anum_pg_subscription_subslotname, &isnull);
+       Assert(!isnull);
+       slotname = pstrdup(NameStr(*DatumGetName(datum)));
+
+       ObjectAddressSet(myself, SubscriptionRelationId, subid);
+       EventTriggerSQLDropAddObject(&myself, true, true);
+
+       /* Remove the tuple from catalog. */
+       simple_heap_delete(rel, &tup->t_self);
+
+       ReleaseSysCache(tup);
+
+       /* Protect against launcher restarting the worker. */
+       LWLockAcquire(LogicalRepLauncherLock, LW_EXCLUSIVE);
+
+       /* Kill the apply worker so that the slot becomes accessible. */
+       logicalrep_worker_stop(subid);
+
+       /* Remove the origin tracking if exists. */
+       snprintf(originname, sizeof(originname), "pg_%u", subid);
+       originid = replorigin_by_name(originname, true);
+       if (originid != InvalidRepOriginId)
+               replorigin_drop(originid);
+
+       /* If the user asked to not drop the slot, we are done mow.*/
+       if (!stmt->drop_slot)
+       {
+               heap_close(rel, NoLock);
+               return;
+       }
+
+       /*
+        * Otherwise drop the replication slot at the publisher node using
+        * the replication connection.
+        */
+       load_file("libpqwalreceiver", false);
+
+       initStringInfo(&cmd);
+       appendStringInfo(&cmd, "DROP_REPLICATION_SLOT \"%s\"", slotname);
+
+       wrconn = walrcv_connect(conninfo, true, subname, &err);
+       if (wrconn == NULL)
+               ereport(ERROR,
+                               (errmsg("could not connect to publisher when attempting to "
+                                               "drop the replication slot \"%s\"", slotname),
+                                errdetail("The error was: %s", err)));
+
+       if (!walrcv_command(wrconn, cmd.data, &err))
+               ereport(ERROR,
+                               (errmsg("count not drop the replication slot \"%s\" on publisher",
+                                               slotname),
+                                errdetail("The error was: %s", err)));
+       else
+               ereport(NOTICE,
+                               (errmsg("dropped replication slot \"%s\" on publisher",
+                                               slotname)));
+
+       walrcv_disconnect(wrconn);
+
+       pfree(cmd.data);
+
+       heap_close(rel, NoLock);
+}
+
+/*
+ * Internal workhorse for changing a subscription owner
+ */
+static void
+AlterSubscriptionOwner_internal(Relation rel, HeapTuple tup, Oid newOwnerId)
+{
+       Form_pg_subscription form;
+
+       form = (Form_pg_subscription) GETSTRUCT(tup);
+
+       if (form->subowner == newOwnerId)
+               return;
+
+       if (!pg_subscription_ownercheck(HeapTupleGetOid(tup), GetUserId()))
+               aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_SUBSCRIPTION,
+                                          NameStr(form->subname));
+
+       /* New owner must be a superuser */
+       if (!superuser_arg(newOwnerId))
+               ereport(ERROR,
+                               (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+                 errmsg("permission denied to change owner of subscription \"%s\"",
+                                NameStr(form->subname)),
+                        errhint("The owner of an subscription must be a superuser.")));
+
+       form->subowner = newOwnerId;
+       simple_heap_update(rel, &tup->t_self, tup);
+       CatalogUpdateIndexes(rel, tup);
+
+       /* Update owner dependency reference */
+       changeDependencyOnOwner(SubscriptionRelationId,
+                                                       HeapTupleGetOid(tup),
+                                                       newOwnerId);
+
+       InvokeObjectPostAlterHook(SubscriptionRelationId,
+                                                         HeapTupleGetOid(tup), 0);
+}
+
+/*
+ * Change subscription owner -- by name
+ */
+ObjectAddress
+AlterSubscriptionOwner(const char *name, Oid newOwnerId)
+{
+       Oid                     subid;
+       HeapTuple       tup;
+       Relation        rel;
+       ObjectAddress address;
+
+       rel = heap_open(SubscriptionRelationId, RowExclusiveLock);
+
+       tup = SearchSysCacheCopy2(SUBSCRIPTIONNAME, MyDatabaseId,
+                                                         CStringGetDatum(name));
+
+       if (!HeapTupleIsValid(tup))
+               ereport(ERROR,
+                               (errcode(ERRCODE_UNDEFINED_OBJECT),
+                                errmsg("subscription \"%s\" does not exist", name)));
+
+       subid = HeapTupleGetOid(tup);
+
+       AlterSubscriptionOwner_internal(rel, tup, newOwnerId);
+
+       ObjectAddressSet(address, SubscriptionRelationId, subid);
+
+       heap_freetuple(tup);
+
+       heap_close(rel, RowExclusiveLock);
+
+       return address;
+}
+
+/*
+ * Change subscription owner -- by OID
+ */
+void
+AlterSubscriptionOwner_oid(Oid subid, Oid newOwnerId)
+{
+       HeapTuple       tup;
+       Relation        rel;
+
+       rel = heap_open(SubscriptionRelationId, RowExclusiveLock);
+
+       tup = SearchSysCacheCopy1(SUBSCRIPTIONOID, ObjectIdGetDatum(subid));
+
+       if (!HeapTupleIsValid(tup))
+               ereport(ERROR,
+                               (errcode(ERRCODE_UNDEFINED_OBJECT),
+                                errmsg("subscription with OID %u does not exist", subid)));
+
+       AlterSubscriptionOwner_internal(rel, tup, newOwnerId);
+
+       heap_freetuple(tup);
+
+       heap_close(rel, RowExclusiveLock);
+}
index 6ed2a3dc4d12f493e281b55bdc35a65124b77624..c4b0011bdd61e3e0589fb35a559090e4f7342987 100644 (file)
@@ -12055,6 +12055,18 @@ ATPrepChangePersistence(Relation rel, bool toLogged)
                        break;
        }
 
+       /*
+        * Check that the table is not part any publication when changing to
+        * UNLOGGED as UNLOGGED tables can't be published.
+        */
+       if (!toLogged &&
+               list_length(GetRelationPublications(RelationGetRelid(rel))) > 0)
+               ereport(ERROR,
+                               (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                                errmsg("cannot change table \"%s\" to unlogged because it is part of a publication",
+                                               RelationGetRelationName(rel)),
+                                errdetail("Unlogged relations cannot be replicated.")));
+
        /*
         * Check existing foreign key constraints to preserve the invariant that
         * permanent tables cannot reference unlogged ones.  Self-referencing
index c51415830ae2ee82ebb6d247daa5456430281cc2..2a2b7eb9bd94c141eb721e7bdc5dfffe4fe2e240 100644 (file)
@@ -14,7 +14,7 @@ include $(top_builddir)/src/Makefile.global
 
 OBJS = execAmi.o execCurrent.o execGrouping.o execIndexing.o execJunk.o \
        execMain.o execParallel.o execProcnode.o execQual.o \
-       execScan.o execTuples.o \
+       execReplication.o execScan.o execTuples.o \
        execUtils.o functions.o instrument.o nodeAppend.o nodeAgg.o \
        nodeBitmapAnd.o nodeBitmapOr.o \
        nodeBitmapHeapscan.o nodeBitmapIndexscan.o \
index e6edcc06c2393e58d6eb8c36339974e43c5940fb..0bc146ca47df06bfbcab8fc1e0c7c5e3b0da94af 100644 (file)
@@ -43,6 +43,7 @@
 #include "access/xact.h"
 #include "catalog/namespace.h"
 #include "catalog/partition.h"
+#include "catalog/pg_publication.h"
 #include "commands/matview.h"
 #include "commands/trigger.h"
 #include "executor/execdebug.h"
@@ -1024,7 +1025,7 @@ CheckValidResultRel(Relation resultRel, CmdType operation)
        {
                case RELKIND_RELATION:
                case RELKIND_PARTITIONED_TABLE:
-                       /* OK */
+                       CheckCmdReplicaIdentity(resultRel, operation);
                        break;
                case RELKIND_SEQUENCE:
                        ereport(ERROR,
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
new file mode 100644 (file)
index 0000000..a18ae51
--- /dev/null
@@ -0,0 +1,553 @@
+/*-------------------------------------------------------------------------
+ *
+ * execReplication.c
+ *       miscellaneous executor routines for logical replication
+ *
+ * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *       src/backend/executor/execReplication.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/relscan.h"
+#include "access/transam.h"
+#include "access/xact.h"
+#include "commands/trigger.h"
+#include "executor/executor.h"
+#include "nodes/nodeFuncs.h"
+#include "parser/parse_relation.h"
+#include "parser/parsetree.h"
+#include "storage/bufmgr.h"
+#include "storage/lmgr.h"
+#include "utils/datum.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "utils/snapmgr.h"
+#include "utils/syscache.h"
+#include "utils/tqual.h"
+
+
+/*
+ * Setup a ScanKey for a search in the relation 'rel' for a tuple 'key' that
+ * is setup to match 'rel' (*NOT* idxrel!).
+ *
+ * Returns whether any column contains NULLs.
+ *
+ * This is not generic routine, it expects the idxrel to be replication
+ * identity of a rel and meet all limitations associated with that.
+ */
+static bool
+build_replindex_scan_key(ScanKey skey, Relation rel, Relation idxrel,
+                                                TupleTableSlot *searchslot)
+{
+       int                     attoff;
+       bool            isnull;
+       Datum           indclassDatum;
+       oidvector  *opclass;
+       int2vector *indkey = &idxrel->rd_index->indkey;
+       bool            hasnulls = false;
+
+       Assert(RelationGetReplicaIndex(rel) == RelationGetRelid(idxrel));
+
+       indclassDatum = SysCacheGetAttr(INDEXRELID, idxrel->rd_indextuple,
+                                                                       Anum_pg_index_indclass, &isnull);
+       Assert(!isnull);
+       opclass = (oidvector *) DatumGetPointer(indclassDatum);
+
+       /* Build scankey for every attribute in the index. */
+       for (attoff = 0; attoff < RelationGetNumberOfAttributes(idxrel); attoff++)
+       {
+               Oid                     operator;
+               Oid                     opfamily;
+               RegProcedure regop;
+               int                     pkattno = attoff + 1;
+               int                     mainattno = indkey->values[attoff];
+               Oid                     optype = get_opclass_input_type(opclass->values[attoff]);
+
+               /*
+                * Load the operator info.  We need this to get the equality operator
+                * function for the scan key.
+                */
+               opfamily = get_opclass_family(opclass->values[attoff]);
+
+               operator = get_opfamily_member(opfamily, optype,
+                                                                          optype,
+                                                                          BTEqualStrategyNumber);
+
+               if (!OidIsValid(operator))
+                       elog(ERROR, "could not find member %d(%u,%u) of opfamily %u",
+                                BTEqualStrategyNumber, optype, optype, opfamily);
+
+               regop = get_opcode(operator);
+
+               /* Initialize the scankey. */
+               ScanKeyInit(&skey[attoff],
+                                       pkattno,
+                                       BTEqualStrategyNumber,
+                                       regop,
+                                       searchslot->tts_values[mainattno - 1]);
+
+               /* Check for null value. */
+               if (searchslot->tts_isnull[mainattno - 1])
+               {
+                       hasnulls = true;
+                       skey[attoff].sk_flags |= SK_ISNULL;
+               }
+       }
+
+       return hasnulls;
+}
+
+/*
+ * Search the relation 'rel' for tuple using the index.
+ *
+ * If a matching tuple is found, lock it with lockmode, fill the slot with its
+ * contents, and return true.  Return false otherwise.
+ */
+bool
+RelationFindReplTupleByIndex(Relation rel, Oid idxoid,
+                                                        LockTupleMode lockmode,
+                                                        TupleTableSlot *searchslot,
+                                                        TupleTableSlot *outslot)
+{
+       HeapTuple               scantuple;
+       ScanKeyData             skey[INDEX_MAX_KEYS];
+       IndexScanDesc   scan;
+       SnapshotData    snap;
+       TransactionId   xwait;
+       Relation                idxrel;
+       bool                    found;
+
+       /* Open the index.*/
+       idxrel = index_open(idxoid, RowExclusiveLock);
+
+       /* Start an index scan. */
+       InitDirtySnapshot(snap);
+       scan = index_beginscan(rel, idxrel, &snap,
+                                                  RelationGetNumberOfAttributes(idxrel),
+                                                  0);
+
+       /* Build scan key. */
+       build_replindex_scan_key(skey, rel, idxrel, searchslot);
+
+retry:
+       found = false;
+
+       index_rescan(scan, skey, RelationGetNumberOfAttributes(idxrel), NULL, 0);
+
+       /* Try to find the tuple */
+       if ((scantuple = index_getnext(scan, ForwardScanDirection)) != NULL)
+       {
+               found = true;
+               ExecStoreTuple(scantuple, outslot, InvalidBuffer, false);
+               ExecMaterializeSlot(outslot);
+
+               xwait = TransactionIdIsValid(snap.xmin) ?
+                       snap.xmin : snap.xmax;
+
+               /*
+                * If the tuple is locked, wait for locking transaction to finish
+                * and retry.
+                */
+               if (TransactionIdIsValid(xwait))
+               {
+                       XactLockTableWait(xwait, NULL, NULL, XLTW_None);
+                       goto retry;
+               }
+       }
+
+       /* Found tuple, try to lock it in the lockmode. */
+       if (found)
+       {
+               Buffer buf;
+               HeapUpdateFailureData hufd;
+               HTSU_Result res;
+               HeapTupleData locktup;
+
+               ItemPointerCopy(&outslot->tts_tuple->t_self, &locktup.t_self);
+
+               PushActiveSnapshot(GetLatestSnapshot());
+
+               res = heap_lock_tuple(rel, &locktup, GetCurrentCommandId(false),
+                                                         lockmode,
+                                                         false /* wait */,
+                                                         false /* don't follow updates */,
+                                                         &buf, &hufd);
+               /* the tuple slot already has the buffer pinned */
+               ReleaseBuffer(buf);
+
+               PopActiveSnapshot();
+
+               switch (res)
+               {
+                       case HeapTupleMayBeUpdated:
+                               break;
+                       case HeapTupleUpdated:
+                               /* XXX: Improve handling here */
+                               ereport(LOG,
+                                               (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+                                                errmsg("concurrent update, retrying")));
+                               goto retry;
+                       case HeapTupleInvisible:
+                               elog(ERROR, "attempted to lock invisible tuple");
+                       default:
+                               elog(ERROR, "unexpected heap_lock_tuple status: %u", res);
+                               break;
+               }
+       }
+
+       index_endscan(scan);
+
+       /* Don't release lock until commit. */
+       index_close(idxrel, NoLock);
+
+       return found;
+}
+
+/*
+ * Compare the tuple and slot and check if they have equal values.
+ *
+ * We use binary datum comparison which might return false negatives but
+ * that's the best we can do here as there may be multiple notions of
+ * equality for the data types and table columns don't specify which one
+ * to use.
+ */
+static bool
+tuple_equals_slot(TupleDesc    desc, HeapTuple tup, TupleTableSlot *slot)
+{
+       Datum           values[MaxTupleAttributeNumber];
+       bool            isnull[MaxTupleAttributeNumber];
+       int                     attrnum;
+       Form_pg_attribute att;
+
+       heap_deform_tuple(tup, desc, values, isnull);
+
+       /* Check equality of the attributes. */
+       for (attrnum = 0; attrnum < desc->natts; attrnum++)
+       {
+               /*
+                * If one value is NULL and other is not, then they are certainly not
+                * equal
+                */
+               if (isnull[attrnum] != slot->tts_isnull[attrnum])
+                       return false;
+
+               /*
+                * If both are NULL, they can be considered equal.
+                */
+               if (isnull[attrnum])
+                       continue;
+
+               att = desc->attrs[attrnum];
+               if (!datumIsEqual(values[attrnum], slot->tts_values[attrnum],
+                                                 att->attbyval, att->attlen))
+                       return false;
+       }
+
+       return true;
+}
+
+/*
+ * Search the relation 'rel' for tuple using the sequential scan.
+ *
+ * If a matching tuple is found, lock it with lockmode, fill the slot with its
+ * contents, and return true.  Return false otherwise.
+ *
+ * Note that this stops on the first matching tuple.
+ *
+ * This can obviously be quite slow on tables that have more than few rows.
+ */
+bool
+RelationFindReplTupleSeq(Relation rel, LockTupleMode lockmode,
+                                                TupleTableSlot *searchslot, TupleTableSlot *outslot)
+{
+       HeapTuple               scantuple;
+       HeapScanDesc    scan;
+       SnapshotData    snap;
+       TransactionId   xwait;
+       bool                    found;
+       TupleDesc               desc = RelationGetDescr(rel);
+
+       Assert(equalTupleDescs(desc, outslot->tts_tupleDescriptor));
+
+       /* Start an index scan. */
+       InitDirtySnapshot(snap);
+       scan = heap_beginscan(rel, &snap, 0, NULL);
+
+retry:
+       found = false;
+
+       heap_rescan(scan, NULL);
+
+       /* Try to find the tuple */
+       while ((scantuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+       {
+               if (!tuple_equals_slot(desc, scantuple, searchslot))
+                       continue;
+
+               found = true;
+               ExecStoreTuple(scantuple, outslot, InvalidBuffer, false);
+               ExecMaterializeSlot(outslot);
+
+               xwait = TransactionIdIsValid(snap.xmin) ?
+                       snap.xmin : snap.xmax;
+
+               /*
+                * If the tuple is locked, wait for locking transaction to finish
+                * and retry.
+                */
+               if (TransactionIdIsValid(xwait))
+               {
+                       XactLockTableWait(xwait, NULL, NULL, XLTW_None);
+                       goto retry;
+               }
+       }
+
+       /* Found tuple, try to lock it in the lockmode. */
+       if (found)
+       {
+               Buffer buf;
+               HeapUpdateFailureData hufd;
+               HTSU_Result res;
+               HeapTupleData locktup;
+
+               ItemPointerCopy(&outslot->tts_tuple->t_self, &locktup.t_self);
+
+               PushActiveSnapshot(GetLatestSnapshot());
+
+               res = heap_lock_tuple(rel, &locktup, GetCurrentCommandId(false),
+                                                         lockmode,
+                                                         false /* wait */,
+                                                         false /* don't follow updates */,
+                                                         &buf, &hufd);
+               /* the tuple slot already has the buffer pinned */
+               ReleaseBuffer(buf);
+
+               PopActiveSnapshot();
+
+               switch (res)
+               {
+                       case HeapTupleMayBeUpdated:
+                               break;
+                       case HeapTupleUpdated:
+                               /* XXX: Improve handling here */
+                               ereport(LOG,
+                                               (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+                                                errmsg("concurrent update, retrying")));
+                               goto retry;
+                       case HeapTupleInvisible:
+                               elog(ERROR, "attempted to lock invisible tuple");
+                       default:
+                               elog(ERROR, "unexpected heap_lock_tuple status: %u", res);
+                               break;
+               }
+       }
+
+       heap_endscan(scan);
+
+       return found;
+}
+
+/*
+ * Insert tuple represented in the slot to the relation, update the indexes,
+ * and execute any constraints and per-row triggers.
+ *
+ * Caller is responsible for opening the indexes.
+ */
+void
+ExecSimpleRelationInsert(EState *estate, TupleTableSlot *slot)
+{
+       bool                    skip_tuple = false;
+       HeapTuple               tuple;
+       ResultRelInfo  *resultRelInfo = estate->es_result_relation_info;
+       Relation                rel = resultRelInfo->ri_RelationDesc;
+
+       /* For now we support only tables. */
+       Assert(rel->rd_rel->relkind == RELKIND_RELATION);
+
+       CheckCmdReplicaIdentity(rel, CMD_INSERT);
+
+       /* BEFORE ROW INSERT Triggers */
+       if (resultRelInfo->ri_TrigDesc &&
+               resultRelInfo->ri_TrigDesc->trig_insert_before_row)
+       {
+               slot = ExecBRInsertTriggers(estate, resultRelInfo, slot);
+
+               if (slot == NULL)       /* "do nothing" */
+                       skip_tuple = true;
+       }
+
+       if (!skip_tuple)
+       {
+               List       *recheckIndexes = NIL;
+
+               /* Check the constraints of the tuple */
+               if (rel->rd_att->constr)
+                       ExecConstraints(resultRelInfo, slot, slot, estate);
+
+               /* Store the slot into tuple that we can insett. */
+               tuple = ExecMaterializeSlot(slot);
+
+               /* OK, store the tuple and create index entries for it */
+               simple_heap_insert(rel, tuple);
+
+               if (resultRelInfo->ri_NumIndices > 0)
+                       recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
+                                                                                                  estate, false, NULL,
+                                                                                                  NIL);
+
+               /* AFTER ROW INSERT Triggers */
+               ExecARInsertTriggers(estate, resultRelInfo, tuple,
+                                                        recheckIndexes);
+
+               list_free(recheckIndexes);
+       }
+}
+
+/*
+ * Find the searchslot tuple and update it with data in the slot,
+ * update the indexes, and execute any constraints and per-row triggers.
+ *
+ * Caller is responsible for opening the indexes.
+ */
+void
+ExecSimpleRelationUpdate(EState *estate, EPQState *epqstate,
+                                                TupleTableSlot *searchslot, TupleTableSlot *slot)
+{
+       bool                    skip_tuple = false;
+       HeapTuple               tuple;
+       ResultRelInfo  *resultRelInfo = estate->es_result_relation_info;
+       Relation                rel = resultRelInfo->ri_RelationDesc;
+
+       /* For now we support only tables. */
+       Assert(rel->rd_rel->relkind == RELKIND_RELATION);
+
+       CheckCmdReplicaIdentity(rel, CMD_UPDATE);
+
+       /* BEFORE ROW INSERT Triggers */
+       if (resultRelInfo->ri_TrigDesc &&
+               resultRelInfo->ri_TrigDesc->trig_update_before_row)
+       {
+               slot = ExecBRUpdateTriggers(estate, epqstate, resultRelInfo,
+                                                                       &searchslot->tts_tuple->t_self,
+                                                                       NULL, slot);
+
+               if (slot == NULL)       /* "do nothing" */
+                       skip_tuple = true;
+       }
+
+       if (!skip_tuple)
+       {
+               List       *recheckIndexes = NIL;
+
+               /* Check the constraints of the tuple */
+               if (rel->rd_att->constr)
+                       ExecConstraints(resultRelInfo, slot, slot, estate);
+
+               /* Store the slot into tuple that we can write. */
+               tuple = ExecMaterializeSlot(slot);
+
+               /* OK, update the tuple and index entries for it */
+               simple_heap_update(rel, &searchslot->tts_tuple->t_self,
+                                                  slot->tts_tuple);
+
+               if (resultRelInfo->ri_NumIndices > 0 &&
+                       !HeapTupleIsHeapOnly(slot->tts_tuple))
+                       recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
+                                                                                                  estate, false, NULL,
+                                                                                                  NIL);
+
+               /* AFTER ROW UPDATE Triggers */
+               ExecARUpdateTriggers(estate, resultRelInfo,
+                                                        &searchslot->tts_tuple->t_self,
+                                                        NULL, tuple, recheckIndexes);
+
+               list_free(recheckIndexes);
+       }
+}
+
+/*
+ * Find the searchslot tuple and delete it, and execute any constraints
+ * and per-row triggers.
+ *
+ * Caller is responsible for opening the indexes.
+ */
+void
+ExecSimpleRelationDelete(EState *estate, EPQState *epqstate,
+                                                TupleTableSlot *searchslot)
+{
+       bool                    skip_tuple = false;
+       ResultRelInfo  *resultRelInfo = estate->es_result_relation_info;
+       Relation                rel = resultRelInfo->ri_RelationDesc;
+
+       /* For now we support only tables. */
+       Assert(rel->rd_rel->relkind == RELKIND_RELATION);
+
+       CheckCmdReplicaIdentity(rel, CMD_DELETE);
+
+       /* BEFORE ROW INSERT Triggers */
+       if (resultRelInfo->ri_TrigDesc &&
+               resultRelInfo->ri_TrigDesc->trig_update_before_row)
+       {
+               skip_tuple = !ExecBRDeleteTriggers(estate, epqstate, resultRelInfo,
+                                                                                  &searchslot->tts_tuple->t_self,
+                                                                                  NULL);
+       }
+
+       if (!skip_tuple)
+       {
+               List       *recheckIndexes = NIL;
+
+               /* OK, delete the tuple */
+               simple_heap_delete(rel, &searchslot->tts_tuple->t_self);
+
+               /* AFTER ROW DELETE Triggers */
+               ExecARDeleteTriggers(estate, resultRelInfo,
+                                                        &searchslot->tts_tuple->t_self, NULL);
+
+               list_free(recheckIndexes);
+       }
+}
+
+/*
+ * Check if command can be executed with current replica identity.
+ */
+void
+CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
+{
+       PublicationActions *pubactions;
+
+       /* We only need to do checks for UPDATE and DELETE. */
+       if (cmd != CMD_UPDATE && cmd != CMD_DELETE)
+               return;
+
+       /* If relation has replica identity we are always good. */
+       if (rel->rd_rel->relreplident == REPLICA_IDENTITY_FULL ||
+               OidIsValid(RelationGetReplicaIndex(rel)))
+               return;
+
+       /*
+        * This is either UPDATE OR DELETE and there is no replica identity.
+        *
+        * Check if the table publishes UPDATES or DELETES.
+        */
+       pubactions = GetRelationPublicationActions(rel);
+       if (cmd == CMD_UPDATE && pubactions->pubupdate)
+               ereport(ERROR,
+                               (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                                errmsg("cannot update table \"%s\" because it does not have replica identity and publishes updates",
+                                               RelationGetRelationName(rel)),
+                                errhint("To enable updating the table, set REPLICA IDENTITY using ALTER TABLE.")));
+       else if (cmd == CMD_DELETE && pubactions->pubdelete)
+               ereport(ERROR,
+                               (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                                errmsg("cannot delete from table \"%s\" because it does not have replica identity and publishes deletes",
+                                               RelationGetRelationName(rel)),
+                                errhint("To enable deleting from the table, set REPLICA IDENTITY using ALTER TABLE.")));
+}
index f871e9d4bbf5b8c38c909551d868de735c01124d..30d733e57a002d23f45ccf653b0bca3962991f6a 100644 (file)
@@ -4286,6 +4286,69 @@ _copyPartitionCmd(const PartitionCmd *from)
        return newnode;
 }
 
+static CreatePublicationStmt *
+_copyCreatePublicationStmt(const CreatePublicationStmt *from)
+{
+       CreatePublicationStmt *newnode = makeNode(CreatePublicationStmt);
+
+       COPY_STRING_FIELD(pubname);
+       COPY_NODE_FIELD(options);
+       COPY_NODE_FIELD(tables);
+       COPY_SCALAR_FIELD(for_all_tables);
+
+       return newnode;
+}
+
+static AlterPublicationStmt *
+_copyAlterPublicationStmt(const AlterPublicationStmt *from)
+{
+       AlterPublicationStmt *newnode = makeNode(AlterPublicationStmt);
+
+       COPY_STRING_FIELD(pubname);
+       COPY_NODE_FIELD(options);
+       COPY_NODE_FIELD(tables);
+       COPY_SCALAR_FIELD(for_all_tables);
+       COPY_SCALAR_FIELD(tableAction);
+
+       return newnode;
+}
+
+static CreateSubscriptionStmt *
+_copyCreateSubscriptionStmt(const CreateSubscriptionStmt *from)
+{
+       CreateSubscriptionStmt *newnode = makeNode(CreateSubscriptionStmt);
+
+       COPY_STRING_FIELD(subname);
+       COPY_STRING_FIELD(conninfo);
+       COPY_NODE_FIELD(publication);
+       COPY_NODE_FIELD(options);
+
+       return newnode;
+}
+
+static AlterSubscriptionStmt *
+_copyAlterSubscriptionStmt(const AlterSubscriptionStmt *from)
+{
+       AlterSubscriptionStmt *newnode = makeNode(AlterSubscriptionStmt);
+
+       COPY_STRING_FIELD(subname);
+       COPY_NODE_FIELD(options);
+
+       return newnode;
+}
+
+static DropSubscriptionStmt *
+_copyDropSubscriptionStmt(const DropSubscriptionStmt *from)
+{
+       DropSubscriptionStmt *newnode = makeNode(DropSubscriptionStmt);
+
+       COPY_STRING_FIELD(subname);
+       COPY_SCALAR_FIELD(drop_slot);
+       COPY_SCALAR_FIELD(missing_ok);
+
+       return newnode;
+}
+
 /* ****************************************************************
  *                                     pg_list.h copy functions
  * ****************************************************************
@@ -5086,6 +5149,21 @@ copyObject(const void *from)
                case T_AlterPolicyStmt:
                        retval = _copyAlterPolicyStmt(from);
                        break;
+               case T_CreatePublicationStmt:
+                       retval = _copyCreatePublicationStmt(from);
+                       break;
+               case T_AlterPublicationStmt:
+                       retval = _copyAlterPublicationStmt(from);
+                       break;
+               case T_CreateSubscriptionStmt:
+                       retval = _copyCreateSubscriptionStmt(from);
+                       break;
+               case T_AlterSubscriptionStmt:
+                       retval = _copyAlterSubscriptionStmt(from);
+                       break;
+               case T_DropSubscriptionStmt:
+                       retval = _copyDropSubscriptionStmt(from);
+                       break;
                case T_A_Expr:
                        retval = _copyAExpr(from);
                        break;
index 78ed3c773e5a7aba7199a078e2451904fa51d32f..55c73b729262012cd66bbd2ef8db77aa7f4c45dc 100644 (file)
@@ -2134,6 +2134,64 @@ _equalAlterTSConfigurationStmt(const AlterTSConfigurationStmt *a,
        return true;
 }
 
+static bool
+_equalCreatePublicationStmt(const CreatePublicationStmt *a,
+                                                       const CreatePublicationStmt *b)
+{
+       COMPARE_STRING_FIELD(pubname);
+       COMPARE_NODE_FIELD(options);
+       COMPARE_NODE_FIELD(tables);
+       COMPARE_SCALAR_FIELD(for_all_tables);
+
+       return true;
+}
+
+static bool
+_equalAlterPublicationStmt(const AlterPublicationStmt *a,
+                                                  const AlterPublicationStmt *b)
+{
+       COMPARE_STRING_FIELD(pubname);
+       COMPARE_NODE_FIELD(options);
+       COMPARE_NODE_FIELD(tables);
+       COMPARE_SCALAR_FIELD(for_all_tables);
+       COMPARE_SCALAR_FIELD(tableAction);
+
+       return true;
+}
+
+static bool
+_equalCreateSubscriptionStmt(const CreateSubscriptionStmt *a,
+                                                        const CreateSubscriptionStmt *b)
+{
+       COMPARE_STRING_FIELD(subname);
+       COMPARE_STRING_FIELD(conninfo);
+       COMPARE_NODE_FIELD(publication);
+       COMPARE_NODE_FIELD(options);
+
+       return true;
+}
+
+static bool
+_equalAlterSubscriptionStmt(const AlterSubscriptionStmt *a,
+                                                       const AlterSubscriptionStmt *b)
+{
+       COMPARE_STRING_FIELD(subname);
+       COMPARE_NODE_FIELD(options);
+
+       return true;
+}
+
+static bool
+_equalDropSubscriptionStmt(const DropSubscriptionStmt *a,
+                                                  const DropSubscriptionStmt *b)
+{
+       COMPARE_STRING_FIELD(subname);
+       COMPARE_SCALAR_FIELD(drop_slot);
+       COMPARE_SCALAR_FIELD(missing_ok);
+
+       return true;
+}
+
 static bool
 _equalCreatePolicyStmt(const CreatePolicyStmt *a, const CreatePolicyStmt *b)
 {
@@ -3349,6 +3407,21 @@ equal(const void *a, const void *b)
                case T_AlterPolicyStmt:
                        retval = _equalAlterPolicyStmt(a, b);
                        break;
+               case T_CreatePublicationStmt:
+                       retval = _equalCreatePublicationStmt(a, b);
+                       break;
+               case T_AlterPublicationStmt:
+                       retval = _equalAlterPublicationStmt(a, b);
+                       break;
+               case T_CreateSubscriptionStmt:
+                       retval = _equalCreateSubscriptionStmt(a, b);
+                       break;
+               case T_AlterSubscriptionStmt:
+                       retval = _equalAlterSubscriptionStmt(a, b);
+                       break;
+               case T_DropSubscriptionStmt:
+                       retval = _equalDropSubscriptionStmt(a, b);
+                       break;
                case T_A_Expr:
                        retval = _equalAExpr(a, b);
                        break;
index e61ba06efe59088f358337fae9c5e85f3de58eef..a8e35fecccdcc06d717bb8bbeb2b15b35bd40f55 100644 (file)
@@ -280,6 +280,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
                DropOwnedStmt ReassignOwnedStmt
                AlterTSConfigurationStmt AlterTSDictionaryStmt
                CreateMatViewStmt RefreshMatViewStmt CreateAmStmt
+               CreatePublicationStmt AlterPublicationStmt
+               CreateSubscriptionStmt AlterSubscriptionStmt DropSubscriptionStmt
 
 %type <node>   select_no_parens select_with_parens select_clause
                                simple_select values_clause
@@ -334,6 +336,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
                                database_name access_method_clause access_method attr_name
                                name cursor_name file_name
                                index_name opt_index_name cluster_index_specification
+                               def_key
 
 %type <list>   func_name handler_name qual_Op qual_all_Op subquery_Op
                                opt_class opt_inline_handler opt_validator validator_clause
@@ -391,10 +394,13 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
                                relation_expr_list dostmt_opt_list
                                transform_element_list transform_type_list
                                TriggerTransitions TriggerReferencing
+                               publication_name_list
 
 %type <list>   group_by_list
 %type <node>   group_by_item empty_grouping_set rollup_clause cube_clause
 %type <node>   grouping_sets_clause
+%type <node>   opt_publication_for_tables publication_for_tables
+%type <value>  publication_name_item
 
 %type <list>   opt_fdw_options fdw_options
 %type <defelt> fdw_option
@@ -407,7 +413,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type <fun_param_mode> arg_class
 %type <typnam> func_return func_type
 
-%type <boolean>  opt_trusted opt_restart_seqs
+%type <boolean>  opt_trusted opt_restart_seqs opt_drop_slot
 %type <ival>    OptTemp
 %type <ival>    OptNoLog
 %type <oncommit> OnCommitOption
@@ -647,7 +653,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 
        PARALLEL PARSER PARTIAL PARTITION PASSING PASSWORD PLACING PLANS POLICY
        POSITION PRECEDING PRECISION PRESERVE PREPARE PREPARED PRIMARY
-       PRIOR PRIVILEGES PROCEDURAL PROCEDURE PROGRAM
+       PRIOR PRIVILEGES PROCEDURAL PROCEDURE PROGRAM PUBLICATION
 
        QUOTE
 
@@ -658,9 +664,9 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 
        SAVEPOINT SCHEMA SCROLL SEARCH SECOND_P SECURITY SELECT SEQUENCE SEQUENCES
        SERIALIZABLE SERVER SESSION SESSION_USER SET SETS SETOF SHARE SHOW
-       SIMILAR SIMPLE SKIP SMALLINT SNAPSHOT SOME SQL_P STABLE STANDALONE_P START
-       STATEMENT STATISTICS STDIN STDOUT STORAGE STRICT_P STRIP_P SUBSTRING
-       SYMMETRIC SYSID SYSTEM_P
+       SIMILAR SIMPLE SKIP SLOT SMALLINT SNAPSHOT SOME SQL_P STABLE STANDALONE_P
+       START STATEMENT STATISTICS STDIN STDOUT STORAGE STRICT_P STRIP_P
+       SUBSCRIPTION SUBSTRING SYMMETRIC SYSID SYSTEM_P
 
        TABLE TABLES TABLESAMPLE TABLESPACE TEMP TEMPLATE TEMPORARY TEXT_P THEN
        TIME TIMESTAMP TO TRAILING TRANSACTION TRANSFORM TREAT TRIGGER TRIM TRUE_P
@@ -822,8 +828,10 @@ stmt :
                        | AlterTableStmt
                        | AlterTblSpcStmt
                        | AlterCompositeTypeStmt
+                       | AlterPublicationStmt
                        | AlterRoleSetStmt
                        | AlterRoleStmt
+                       | AlterSubscriptionStmt
                        | AlterTSConfigurationStmt
                        | AlterTSDictionaryStmt
                        | AlterUserMappingStmt
@@ -851,12 +859,14 @@ stmt :
                        | CreateMatViewStmt
                        | CreateOpClassStmt
                        | CreateOpFamilyStmt
+                       | CreatePublicationStmt
                        | AlterOpFamilyStmt
                        | CreatePolicyStmt
                        | CreatePLangStmt
                        | CreateSchemaStmt
                        | CreateSeqStmt
                        | CreateStmt
+                       | CreateSubscriptionStmt
                        | CreateTableSpaceStmt
                        | CreateTransformStmt
                        | CreateTrigStmt
@@ -883,6 +893,7 @@ stmt :
                        | DropPLangStmt
                        | DropRuleStmt
                        | DropStmt
+                       | DropSubscriptionStmt
                        | DropTableSpaceStmt
                        | DropTransformStmt
                        | DropTrigStmt
@@ -5613,16 +5624,21 @@ def_list:       def_elem                                                                { $$ = list_make1($1); }
                        | def_list ',' def_elem                                 { $$ = lappend($1, $3); }
                ;
 
-def_elem:      ColLabel '=' def_arg
+def_elem:      def_key '=' def_arg
                                {
                                        $$ = makeDefElem($1, (Node *) $3, @1);
                                }
-                       | ColLabel
+                       | def_key
                                {
                                        $$ = makeDefElem($1, NULL, @1);
                                }
                ;
 
+def_key:
+                       ColLabel                                                { $$ = $1; }
+                       | ColLabel ColLabel                             { $$ = psprintf("%s %s", $1, $2); }
+               ;
+
 /* Note: any simple identifier will be returned as a type name! */
 def_arg:       func_type                                               { $$ = (Node *)$1; }
                        | reserved_keyword                              { $$ = (Node *)makeString(pstrdup($1)); }
@@ -6073,6 +6089,7 @@ drop_type:        TABLE                                                                   { $$ = OBJECT_TABLE; }
                        | TEXT_P SEARCH DICTIONARY                              { $$ = OBJECT_TSDICTIONARY; }
                        | TEXT_P SEARCH TEMPLATE                                { $$ = OBJECT_TSTEMPLATE; }
                        | TEXT_P SEARCH CONFIGURATION                   { $$ = OBJECT_TSCONFIGURATION; }
+                       | PUBLICATION                                                   { $$ = OBJECT_PUBLICATION; }
                ;
 
 any_name_list:
@@ -8933,8 +8950,236 @@ AlterOwnerStmt: ALTER AGGREGATE aggregate_with_argtypes OWNER TO RoleSpec
                                        n->newowner = $7;
                                        $$ = (Node *)n;
                                }
+                       | ALTER PUBLICATION name OWNER TO RoleSpec
+                               {
+                                       AlterOwnerStmt *n = makeNode(AlterOwnerStmt);
+                                       n->objectType = OBJECT_PUBLICATION;
+                                       n->object = list_make1(makeString($3));
+                                       n->newowner = $6;
+                                       $$ = (Node *)n;
+                               }
+                       | ALTER SUBSCRIPTION name OWNER TO RoleSpec
+                               {
+                                       AlterOwnerStmt *n = makeNode(AlterOwnerStmt);
+                                       n->objectType = OBJECT_SUBSCRIPTION;
+                                       n->object = list_make1(makeString($3));
+                                       n->newowner = $6;
+                                       $$ = (Node *)n;
+                               }
+               ;
+
+
+/*****************************************************************************
+ *
+ * CREATE PUBLICATION name [ FOR TABLE ] [ WITH options ]
+ *
+ *****************************************************************************/
+
+CreatePublicationStmt:
+                       CREATE PUBLICATION name opt_publication_for_tables opt_definition
+                               {
+                                       CreatePublicationStmt *n = makeNode(CreatePublicationStmt);
+                                       n->pubname = $3;
+                                       n->options = $5;
+                                       if ($4 != NULL)
+                                       {
+                                               /* FOR TABLE */
+                                               if (IsA($4, List))
+                                                       n->tables = (List *)$4;
+                                               /* FOR ALL TABLES */
+                                               else
+                                                       n->for_all_tables = TRUE;
+                                       }
+                                       $$ = (Node *)n;
+                               }
                ;
 
+opt_publication_for_tables:
+                       publication_for_tables                                  { $$ = $1; }
+                       | /* EMPTY */                                                   { $$ = NULL; }
+               ;
+
+publication_for_tables:
+                       FOR TABLE relation_expr_list
+                               {
+                                       $$ = (Node *) $3;
+                               }
+                       | FOR ALL TABLES
+                               {
+                                       $$ = (Node *) makeInteger(TRUE);
+                               }
+               ;
+
+/*****************************************************************************
+ *
+ * ALTER PUBLICATION name [ WITH ] options
+ *
+ * ALTER PUBLICATION name ADD TABLE table [, table2]
+ *
+ * ALTER PUBLICATION name DROP TABLE table [, table2]
+ *
+ * ALTER PUBLICATION name SET TABLE table [, table2]
+ *
+ *****************************************************************************/
+
+AlterPublicationStmt:
+                       ALTER PUBLICATION name WITH definition
+                               {
+                                       AlterPublicationStmt *n = makeNode(AlterPublicationStmt);
+                                       n->pubname = $3;
+                                       n->options = $5;
+                                       $$ = (Node *)n;
+                               }
+                       | ALTER PUBLICATION name ADD_P TABLE relation_expr_list
+                               {
+                                       AlterPublicationStmt *n = makeNode(AlterPublicationStmt);
+                                       n->pubname = $3;
+                                       n->tables = $6;
+                                       n->tableAction = DEFELEM_ADD;
+                                       $$ = (Node *)n;
+                               }
+                       | ALTER PUBLICATION name SET TABLE relation_expr_list
+                               {
+                                       AlterPublicationStmt *n = makeNode(AlterPublicationStmt);
+                                       n->pubname = $3;
+                                       n->tables = $6;
+                                       n->tableAction = DEFELEM_SET;
+                                       $$ = (Node *)n;
+                               }
+                       | ALTER PUBLICATION name DROP TABLE relation_expr_list
+                               {
+                                       AlterPublicationStmt *n = makeNode(AlterPublicationStmt);
+                                       n->pubname = $3;
+                                       n->tables = $6;
+                                       n->tableAction = DEFELEM_DROP;
+                                       $$ = (Node *)n;
+                               }
+               ;
+
+/*****************************************************************************
+ *
+ * CREATE SUBSCRIPTION name ...
+ *
+ *****************************************************************************/
+
+CreateSubscriptionStmt:
+                       CREATE SUBSCRIPTION name CONNECTION Sconst PUBLICATION publication_name_list opt_definition
+                               {
+                                       CreateSubscriptionStmt *n =
+                                               makeNode(CreateSubscriptionStmt);
+                                       n->subname = $3;
+                                       n->conninfo = $5;
+                                       n->publication = $7;
+                                       n->options = $8;
+                                       $$ = (Node *)n;
+                               }
+               ;
+
+publication_name_list:
+                       publication_name_item
+                               {
+                                       $$ = list_make1($1);
+                               }
+                       | publication_name_list ',' publication_name_item
+                               {
+                                       $$ = lappend($1, $3);
+                               }
+               ;
+
+publication_name_item:
+                       ColLabel                        { $$ = makeString($1); };
+
+/*****************************************************************************
+ *
+ * ALTER SUBSCRIPTION name [ WITH ] options
+ *
+ *****************************************************************************/
+
+AlterSubscriptionStmt:
+                       ALTER SUBSCRIPTION name WITH definition
+                               {
+                                       AlterSubscriptionStmt *n =
+                                               makeNode(AlterSubscriptionStmt);
+                                       n->subname = $3;
+                                       n->options = $5;
+                                       $$ = (Node *)n;
+                               }
+                       | ALTER SUBSCRIPTION name CONNECTION Sconst
+                               {
+                                       AlterSubscriptionStmt *n =
+                                               makeNode(AlterSubscriptionStmt);
+                                       n->subname = $3;
+                                       n->options = list_make1(makeDefElem("conninfo",
+                                                                                       (Node *)makeString($5), @1));
+                                       $$ = (Node *)n;
+                               }
+                       | ALTER SUBSCRIPTION name SET PUBLICATION publication_name_list
+                               {
+                                       AlterSubscriptionStmt *n =
+                                               makeNode(AlterSubscriptionStmt);
+                                       n->subname = $3;
+                                       n->options = list_make1(makeDefElem("publication",
+                                                                                       (Node *)$6, @1));
+                                       $$ = (Node *)n;
+                               }
+                       | ALTER SUBSCRIPTION name ENABLE_P
+                               {
+                                       AlterSubscriptionStmt *n =
+                                               makeNode(AlterSubscriptionStmt);
+                                       n->subname = $3;
+                                       n->options = list_make1(makeDefElem("enabled",
+                                                                                       (Node *)makeInteger(TRUE), @1));
+                                       $$ = (Node *)n;
+                               }
+                       | ALTER SUBSCRIPTION name DISABLE_P
+                               {
+                                       AlterSubscriptionStmt *n =
+                                               makeNode(AlterSubscriptionStmt);
+                                       n->subname = $3;
+                                       n->options = list_make1(makeDefElem("enabled",
+                                                                                       (Node *)makeInteger(FALSE), @1));
+                                       $$ = (Node *)n;
+                               }               ;
+
+/*****************************************************************************
+ *
+ * DROP SUBSCRIPTION [ IF EXISTS ] name
+ *
+ *****************************************************************************/
+
+DropSubscriptionStmt: DROP SUBSCRIPTION name opt_drop_slot
+                               {
+                                       DropSubscriptionStmt *n = makeNode(DropSubscriptionStmt);
+                                       n->subname = $3;
+                                       n->drop_slot = $4;
+                                       n->missing_ok = false;
+                                       $$ = (Node *) n;
+                               }
+                               |  DROP SUBSCRIPTION IF_P EXISTS name opt_drop_slot
+                               {
+                                       DropSubscriptionStmt *n = makeNode(DropSubscriptionStmt);
+                                       n->subname = $5;
+                                       n->drop_slot = $6;
+                                       n->missing_ok = true;
+                                       $$ = (Node *) n;
+                               }
+               ;
+
+opt_drop_slot:
+                       IDENT SLOT
+                               {
+                                       if (strcmp($1, "drop") == 0)
+                                               $$ = TRUE;
+                                       else if (strcmp($1, "nodrop") == 0)
+                                               $$ = FALSE;
+                                       else
+                                               ereport(ERROR,
+                                                               (errcode(ERRCODE_SYNTAX_ERROR),
+                                                                errmsg("unrecognized option \"%s\"", $1),
+                                                                               parser_errposition(@1)));
+                               }
+                       | /*EMPTY*/                                                             { $$ = TRUE; }
+               ;
 
 /*****************************************************************************
  *
@@ -14201,6 +14446,7 @@ unreserved_keyword:
                        | PROCEDURAL
                        | PROCEDURE
                        | PROGRAM
+                       | PUBLICATION
                        | QUOTE
                        | RANGE
                        | READ
@@ -14244,6 +14490,7 @@ unreserved_keyword:
                        | SHOW
                        | SIMPLE
                        | SKIP
+                       | SLOT
                        | SNAPSHOT
                        | SQL_P
                        | STABLE
@@ -14256,6 +14503,7 @@ unreserved_keyword:
                        | STORAGE
                        | STRICT_P
                        | STRIP_P
+                       | SUBSCRIPTION
                        | SYSID
                        | SYSTEM_P
                        | TABLES
index 61d3170b839e21e4a53ec6bf0954639af191ff85..cd99b0b3927f028b6f48b60b93e96b1e09c5b66c 100644 (file)
@@ -20,6 +20,7 @@
 #include "port/atomics.h"
 #include "postmaster/bgworker_internals.h"
 #include "postmaster/postmaster.h"
+#include "replication/logicallauncher.h"
 #include "storage/dsm.h"
 #include "storage/ipc.h"
 #include "storage/latch.h"
@@ -107,6 +108,15 @@ struct BackgroundWorkerHandle
 
 static BackgroundWorkerArray *BackgroundWorkerData;
 
+/*
+ * List of workers that are allowed to be started outside of
+ * shared_preload_libraries.
+ */
+static const bgworker_main_type InternalBGWorkers[] = {
+       ApplyLauncherMain,
+       NULL
+};
+
 /*
  * Calculate shared memory needed.
  */
@@ -761,12 +771,23 @@ RegisterBackgroundWorker(BackgroundWorker *worker)
 {
        RegisteredBgWorker *rw;
        static int      numworkers = 0;
+       bool            internal = false;
+       int                     i;
 
        if (!IsUnderPostmaster)
                ereport(DEBUG1,
                 (errmsg("registering background worker \"%s\"", worker->bgw_name)));
 
-       if (!process_shared_preload_libraries_in_progress)
+       for (i = 0; InternalBGWorkers[i]; i++)
+       {
+               if (worker->bgw_main == InternalBGWorkers[i])
+               {
+                       internal = true;
+                       break;
+               }
+       }
+
+       if (!process_shared_preload_libraries_in_progress && !internal)
        {
                if (!IsUnderPostmaster)
                        ereport(LOG,
index f37a0bfaafd5d764f30d5ef676862d82f7fda9a7..7176cf1bbeb52de251c179650b34c02ea66bc673 100644 (file)
@@ -3303,6 +3303,12 @@ pgstat_get_wait_activity(WaitEventActivity w)
                case WAIT_EVENT_WAL_WRITER_MAIN:
                        event_name = "WalWriterMain";
                        break;
+               case WAIT_EVENT_LOGICAL_LAUNCHER_MAIN:
+                       event_name = "LogicalLauncherMain";
+                       break;
+               case WAIT_EVENT_LOGICAL_APPLY_MAIN:
+                       event_name = "LogicalApplyMain";
+                       break;
                /* no default case, so that compiler will warn */
        }
 
index 5be30b0ee1562970e6d0a04cc08eb155cec5248a..8d99c7a0d4eeb4e7638036443c175bce6f593470 100644 (file)
 #include "postmaster/pgarch.h"
 #include "postmaster/postmaster.h"
 #include "postmaster/syslogger.h"
+#include "replication/logicallauncher.h"
 #include "replication/walsender.h"
 #include "storage/fd.h"
 #include "storage/ipc.h"
@@ -941,6 +942,14 @@ PostmasterMain(int argc, char *argv[])
        }
 #endif
 
+       /*
+        * Register the apply launcher.  Since it registers a background worker,
+        * it needs to be called before InitializeMaxBackends(), and it's probably
+        * a good idea to call it before any modules had chance to take the
+        * background worker slots.
+        */
+       ApplyLauncherRegister();
+
        /*
         * process any libraries that should be preloaded at postmaster start
         */
index 7671b166ed3bbffaf7b7d8a59d27dae68cb79c80..7df3698afb24036bcc6d224f8871686e25583687 100644 (file)
 #include "access/xlog.h"
 #include "miscadmin.h"
 #include "pgstat.h"
+#include "replication/logicalproto.h"
 #include "replication/walreceiver.h"
 #include "storage/proc.h"
 #include "utils/builtins.h"
+#include "utils/pg_lsn.h"
 
 PG_MODULE_MAGIC;
 
@@ -44,26 +46,35 @@ struct WalReceiverConn
 
 /* Prototypes for interface functions */
 static WalReceiverConn *libpqrcv_connect(const char *conninfo,
-                                                                                bool logical, const char *appname);
+                                                                                bool logical, const char *appname,
+                                                                                char **err);
+static void libpqrcv_check_conninfo(const char *conninfo);
 static char *libpqrcv_get_conninfo(WalReceiverConn *conn);
 static char *libpqrcv_identify_system(WalReceiverConn *conn,
-                                                                         TimeLineID *primary_tli);
+                                                                         TimeLineID *primary_tli,
+                                                                         int *server_version);
 static void libpqrcv_readtimelinehistoryfile(WalReceiverConn *conn,
                                                                 TimeLineID tli, char **filename,
                                                                 char **content, int *len);
 static bool libpqrcv_startstreaming(WalReceiverConn *conn,
-                                               TimeLineID tli, XLogRecPtr startpoint,
-                                               const char *slotname);
+                                                                       const WalRcvStreamOptions *options);
 static void libpqrcv_endstreaming(WalReceiverConn *conn,
                                                                  TimeLineID *next_tli);
 static int     libpqrcv_receive(WalReceiverConn *conn, char **buffer,
                                                         pgsocket *wait_fd);
 static void libpqrcv_send(WalReceiverConn *conn, const char *buffer,
                                                  int nbytes);
+static char *libpqrcv_create_slot(WalReceiverConn *conn,
+                                                                 const char *slotname,
+                                                                 bool temporary,
+                                                                 XLogRecPtr *lsn);
+static bool libpqrcv_command(WalReceiverConn *conn,
+                                                        const char *cmd, char **err);
 static void libpqrcv_disconnect(WalReceiverConn *conn);
 
 static WalReceiverFunctionsType PQWalReceiverFunctions = {
        libpqrcv_connect,
+       libpqrcv_check_conninfo,
        libpqrcv_get_conninfo,
        libpqrcv_identify_system,
        libpqrcv_readtimelinehistoryfile,
@@ -71,11 +82,14 @@ static WalReceiverFunctionsType PQWalReceiverFunctions = {
        libpqrcv_endstreaming,
        libpqrcv_receive,
        libpqrcv_send,
+       libpqrcv_create_slot,
+       libpqrcv_command,
        libpqrcv_disconnect
 };
 
 /* Prototypes for private functions */
 static PGresult *libpqrcv_PQexec(PGconn *streamConn, const char *query);
+static char *stringlist_to_identifierstr(PGconn *conn, List *strings);
 
 /*
  * Module initialization function
@@ -90,9 +104,12 @@ _PG_init(void)
 
 /*
  * Establish the connection to the primary server for XLOG streaming
+ *
+ * Returns NULL on error and fills the err with palloc'ed error message.
  */
 static WalReceiverConn *
-libpqrcv_connect(const char *conninfo, bool logical, const char *appname)
+libpqrcv_connect(const char *conninfo, bool logical, const char *appname,
+                                char **err)
 {
        WalReceiverConn *conn;
        const char *keys[5];
@@ -123,14 +140,34 @@ libpqrcv_connect(const char *conninfo, bool logical, const char *appname)
        conn = palloc0(sizeof(WalReceiverConn));
        conn->streamConn = PQconnectdbParams(keys, vals, /* expand_dbname = */ true);
        if (PQstatus(conn->streamConn) != CONNECTION_OK)
-               ereport(ERROR,
-                               (errmsg("could not connect to the primary server: %s",
-                                               PQerrorMessage(conn->streamConn))));
+       {
+               *err = pstrdup(PQerrorMessage(conn->streamConn));
+               return NULL;
+       }
+
        conn->logical = logical;
 
        return conn;
 }
 
+/*
+ * Validate connection info string (just try to parse it)
+ */
+static void
+libpqrcv_check_conninfo(const char *conninfo)
+{
+       PQconninfoOption   *opts = NULL;
+       char                       *err = NULL;
+
+       opts = PQconninfoParse(conninfo, &err);
+       if (opts == NULL)
+               ereport(ERROR,
+                               (errcode(ERRCODE_SYNTAX_ERROR),
+                                errmsg("invalid connection string syntax: %s", err)));
+
+       PQconninfoFree(opts);
+}
+
 /*
  * Return a user-displayable conninfo string.  Any security-sensitive fields
  * are obfuscated.
@@ -185,7 +222,8 @@ libpqrcv_get_conninfo(WalReceiverConn *conn)
  * timeline ID of the primary.
  */
 static char *
-libpqrcv_identify_system(WalReceiverConn *conn, TimeLineID *primary_tli)
+libpqrcv_identify_system(WalReceiverConn *conn, TimeLineID *primary_tli,
+                                                int *server_version)
 {
        PGresult   *res;
        char       *primary_sysid;
@@ -218,11 +256,13 @@ libpqrcv_identify_system(WalReceiverConn *conn, TimeLineID *primary_tli)
        *primary_tli = pg_atoi(PQgetvalue(res, 0, 1), 4, 0);
        PQclear(res);
 
+       *server_version = PQserverVersion(conn->streamConn);
+
        return primary_sysid;
 }
 
 /*
- * Start streaming WAL data from given startpoint and timeline.
+ * Start streaming WAL data from given streaming options.
  *
  * Returns true if we switched successfully to copy-both mode. False
  * means the server received the command and executed it successfully, but
@@ -233,27 +273,54 @@ libpqrcv_identify_system(WalReceiverConn *conn, TimeLineID *primary_tli)
  */
 static bool
 libpqrcv_startstreaming(WalReceiverConn *conn,
-                                               TimeLineID tli, XLogRecPtr startpoint,
-                                               const char *slotname)
+                                               const WalRcvStreamOptions *options)
 {
        StringInfoData cmd;
        PGresult   *res;
 
-       Assert(!conn->logical);
+       Assert(options->logical == conn->logical);
+       Assert(options->slotname || !options->logical);
 
        initStringInfo(&cmd);
 
-       /* Start streaming from the point requested by startup process */
-       if (slotname != NULL)
-               appendStringInfo(&cmd,
-                                                "START_REPLICATION SLOT \"%s\" %X/%X TIMELINE %u",
-                                                slotname,
-                                                (uint32) (startpoint >> 32), (uint32) startpoint,
-                                                tli);
+       /* Build the command. */
+       appendStringInfoString(&cmd, "START_REPLICATION");
+       if (options->slotname != NULL)
+               appendStringInfo(&cmd, " SLOT \"%s\"",
+                                                options->slotname);
+
+       if (options->logical)
+               appendStringInfo(&cmd, " LOGICAL");
+
+       appendStringInfo(&cmd, " %X/%X",
+                                        (uint32) (options->startpoint >> 32),
+                                        (uint32) options->startpoint);
+
+       /*
+        * Additional options are different depending on if we are doing logical
+        * or physical replication.
+        */
+       if (options->logical)
+       {
+               char   *pubnames_str;
+               List   *pubnames;
+
+               appendStringInfoString(&cmd, " (");
+               appendStringInfo(&cmd, "proto_version '%u'",
+                                                options->proto.logical.proto_version);
+               pubnames = options->proto.logical.publication_names;
+               pubnames_str = stringlist_to_identifierstr(conn->streamConn, pubnames);
+               appendStringInfo(&cmd, ", publication_names %s",
+                                                PQescapeLiteral(conn->streamConn, pubnames_str,
+                                                                                strlen(pubnames_str)));
+               appendStringInfoChar(&cmd, ')');
+               pfree(pubnames_str);
+       }
        else
-               appendStringInfo(&cmd, "START_REPLICATION %X/%X TIMELINE %u",
-                                                (uint32) (startpoint >> 32), (uint32) startpoint,
-                                                tli);
+               appendStringInfo(&cmd, " TIMELINE %u",
+                                                options->proto.physical.startpointTLI);
+
+       /* Start streaming. */
        res = libpqrcv_PQexec(conn->streamConn, cmd.data);
        pfree(cmd.data);
 
@@ -577,3 +644,107 @@ libpqrcv_send(WalReceiverConn *conn, const char *buffer, int nbytes)
                                (errmsg("could not send data to WAL stream: %s",
                                                PQerrorMessage(conn->streamConn))));
 }
+
+/*
+ * Create new replication slot.
+ * Returns the name of the exported snapshot for logical slot or NULL for
+ * physical slot.
+ */
+static char *
+libpqrcv_create_slot(WalReceiverConn *conn, const char *slotname,
+                                        bool temporary, XLogRecPtr *lsn)
+{
+       PGresult           *res;
+       StringInfoData  cmd;
+       char               *snapshot;
+
+       initStringInfo(&cmd);
+
+       appendStringInfo(&cmd, "CREATE_REPLICATION_SLOT \"%s\" ", slotname);
+
+       if (temporary)
+               appendStringInfo(&cmd, "TEMPORARY ");
+
+       if (conn->logical)
+               appendStringInfo(&cmd, "LOGICAL pgoutput");
+
+       res = libpqrcv_PQexec(conn->streamConn, cmd.data);
+       pfree(cmd.data);
+
+       if (PQresultStatus(res) != PGRES_TUPLES_OK)
+       {
+               PQclear(res);
+               ereport(ERROR,
+                               (errmsg("could not create replication slot \"%s\": %s",
+                                               slotname, PQerrorMessage(conn->streamConn))));
+       }
+
+       *lsn = DatumGetLSN(DirectFunctionCall1Coll(pg_lsn_in, InvalidOid,
+                                         CStringGetDatum(PQgetvalue(res, 0, 1))));
+       if (!PQgetisnull(res, 0, 2))
+               snapshot = pstrdup(PQgetvalue(res, 0, 2));
+       else
+               snapshot = NULL;
+
+       PQclear(res);
+
+       return snapshot;
+}
+
+/*
+ * Run command.
+ *
+ * Returns if the command has succeeded and fills the err with palloced
+ * error message if not.
+ */
+static bool
+libpqrcv_command(WalReceiverConn *conn, const char *cmd, char **err)
+{
+       PGresult           *res;
+
+       res = libpqrcv_PQexec(conn->streamConn, cmd);
+
+       if (PQresultStatus(res) != PGRES_COMMAND_OK)
+       {
+               PQclear(res);
+               *err = pstrdup(PQerrorMessage(conn->streamConn));
+               return false;
+       }
+
+       PQclear(res);
+
+       return true;
+}
+
+/*
+ * Given a List of strings, return it as single comma separated
+ * string, quoting identifiers as needed.
+ *
+ * This is essentially the reverse of SplitIdentifierString.
+ *
+ * The caller should free the result.
+ */
+static char *
+stringlist_to_identifierstr(PGconn *conn, List *strings)
+{
+       ListCell *lc;
+       StringInfoData res;
+       bool first = true;
+
+       initStringInfo(&res);
+
+       foreach (lc, strings)
+       {
+               char *val = strVal(lfirst(lc));
+
+               if (first)
+                       first = false;
+               else
+                       appendStringInfoChar(&res, ',');
+
+               appendStringInfoString(&res,
+                                                          PQescapeIdentifier(conn, val, strlen(val)));
+       }
+
+       return res.data;
+}
index 1d7ca062d11c63b1739a63b157d4f96e37aabde9..259befa4e6c7ed2de775b1e7781e1d47edcd7b37 100644 (file)
@@ -14,7 +14,7 @@ include $(top_builddir)/src/Makefile.global
 
 override CPPFLAGS := -I$(srcdir) $(CPPFLAGS)
 
-OBJS = decode.o logical.o logicalfuncs.o message.o origin.o reorderbuffer.o \
-       snapbuild.o
+OBJS = decode.o launcher.o logical.o logicalfuncs.o message.o origin.o \
+          proto.o relation.o reorderbuffer.o snapbuild.o worker.o
 
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/replication/logical/launcher.c b/src/backend/replication/logical/launcher.c
new file mode 100644 (file)
index 0000000..b5240dc
--- /dev/null
@@ -0,0 +1,759 @@
+/*-------------------------------------------------------------------------
+ * launcher.c
+ *        PostgreSQL logical replication worker launcher process
+ *
+ * Copyright (c) 2012-2016, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *       src/backend/replication/logical/launcher.c
+ *
+ * NOTES
+ *       This module contains the logical replication worker launcher which
+ *       uses the background worker infrastructure to start the logical
+ *       replication workers for every enabled subscription.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "pgstat.h"
+
+#include "access/heapam.h"
+#include "access/htup.h"
+#include "access/htup_details.h"
+#include "access/xact.h"
+
+#include "catalog/pg_subscription.h"
+
+#include "libpq/pqsignal.h"
+
+#include "postmaster/bgworker.h"
+#include "postmaster/fork_process.h"
+#include "postmaster/postmaster.h"
+
+#include "replication/logicallauncher.h"
+#include "replication/logicalworker.h"
+#include "replication/slot.h"
+#include "replication/worker_internal.h"
+
+#include "storage/ipc.h"
+#include "storage/proc.h"
+#include "storage/procarray.h"
+#include "storage/procsignal.h"
+
+#include "tcop/tcopprot.h"
+
+#include "utils/memutils.h"
+#include "utils/pg_lsn.h"
+#include "utils/ps_status.h"
+#include "utils/timeout.h"
+#include "utils/snapmgr.h"
+
+/* max sleep time between cycles (3min) */
+#define DEFAULT_NAPTIME_PER_CYCLE 180000L
+
+int    max_logical_replication_workers = 4;
+LogicalRepWorker *MyLogicalRepWorker = NULL;
+
+typedef struct LogicalRepCtxStruct
+{
+       /* Supervisor process. */
+       pid_t           launcher_pid;
+
+       /* Background workers. */
+       LogicalRepWorker        workers[FLEXIBLE_ARRAY_MEMBER];
+} LogicalRepCtxStruct;
+
+LogicalRepCtxStruct *LogicalRepCtx;
+
+static void logicalrep_worker_onexit(int code, Datum arg);
+static void logicalrep_worker_detach(void);
+
+bool           got_SIGTERM = false;
+static bool    on_commit_laucher_wakeup = false;
+
+Datum pg_stat_get_subscription(PG_FUNCTION_ARGS);
+
+
+/*
+ * Load the list of subscriptions.
+ *
+ * Only the fields interesting for worker start/stop functions are filled for
+ * each subscription.
+ */
+static List *
+get_subscription_list(void)
+{
+       List       *res = NIL;
+       Relation        rel;
+       HeapScanDesc scan;
+       HeapTuple       tup;
+       MemoryContext resultcxt;
+
+       /* This is the context that we will allocate our output data in */
+       resultcxt = CurrentMemoryContext;
+
+       /*
+        * Start a transaction so we can access pg_database, and get a snapshot.
+        * We don't have a use for the snapshot itself, but we're interested in
+        * the secondary effect that it sets RecentGlobalXmin.  (This is critical
+        * for anything that reads heap pages, because HOT may decide to prune
+        * them even if the process doesn't attempt to modify any tuples.)
+        */
+       StartTransactionCommand();
+       (void) GetTransactionSnapshot();
+
+       rel = heap_open(SubscriptionRelationId, AccessShareLock);
+       scan = heap_beginscan_catalog(rel, 0, NULL);
+
+       while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection)))
+       {
+               Form_pg_subscription subform = (Form_pg_subscription) GETSTRUCT(tup);
+               Subscription   *sub;
+               MemoryContext   oldcxt;
+
+               /*
+                * Allocate our results in the caller's context, not the
+                * transaction's. We do this inside the loop, and restore the original
+                * context at the end, so that leaky things like heap_getnext() are
+                * not called in a potentially long-lived context.
+                */
+               oldcxt = MemoryContextSwitchTo(resultcxt);
+
+               sub = (Subscription *) palloc(sizeof(Subscription));
+               sub->oid = HeapTupleGetOid(tup);
+               sub->dbid = subform->subdbid;
+               sub->owner = subform->subowner;
+               sub->enabled = subform->subenabled;
+               sub->name = pstrdup(NameStr(subform->subname));
+
+               /* We don't fill fields we are not interested in. */
+               sub->conninfo = NULL;
+               sub->slotname = NULL;
+               sub->publications = NIL;
+
+               res = lappend(res, sub);
+               MemoryContextSwitchTo(oldcxt);
+       }
+
+       heap_endscan(scan);
+       heap_close(rel, AccessShareLock);
+
+       CommitTransactionCommand();
+
+       return res;
+}
+
+/*
+ * Wait for a background worker to start up and attach to the shmem context.
+ *
+ * This is like WaitForBackgroundWorkerStartup(), except that we wait for
+ * attaching, not just start and we also just exit if postmaster died.
+ */
+static bool
+WaitForReplicationWorkerAttach(LogicalRepWorker *worker,
+                                                          BackgroundWorkerHandle *handle)
+{
+       BgwHandleStatus status;
+       int                     rc;
+
+       for (;;)
+       {
+               pid_t           pid;
+
+               CHECK_FOR_INTERRUPTS();
+
+               status = GetBackgroundWorkerPid(handle, &pid);
+
+               /*
+                * Worker started and attached to our shmem. This check is safe
+                * because only laucher ever starts the workers, so nobody can steal
+                * the worker slot.
+                */
+               if (status == BGWH_STARTED && worker->proc)
+                       return true;
+               /* Worker didn't start or died before attaching to our shmem. */
+               if (status == BGWH_STOPPED)
+                       return false;
+
+               /*
+                * We need timeout because we generaly don't get notified via latch
+                * about the worker attach.
+                */
+               rc = WaitLatch(MyLatch,
+                                          WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
+                                          1000L, WAIT_EVENT_BGWORKER_STARTUP);
+
+               if (rc & WL_POSTMASTER_DEATH)
+                       proc_exit(1);
+
+               ResetLatch(MyLatch);
+       }
+
+       return false;
+}
+
+/*
+ * Walks the workers array and searches for one that matches given
+ * subscription id.
+ */
+LogicalRepWorker *
+logicalrep_worker_find(Oid subid)
+{
+       int     i;
+       LogicalRepWorker   *res = NULL;
+
+       Assert(LWLockHeldByMe(LogicalRepWorkerLock));
+       /* Search for attached worker for a given subscription id. */
+       for (i = 0; i < max_logical_replication_workers; i++)
+       {
+               LogicalRepWorker   *w = &LogicalRepCtx->workers[i];
+               if (w->subid == subid && w->proc && IsBackendPid(w->proc->pid))
+               {
+                       res = w;
+                       break;
+               }
+       }
+
+       return res;
+}
+
+/*
+ * Start new apply background worker.
+ */
+void
+logicalrep_worker_launch(Oid dbid, Oid subid, const char *subname, Oid userid)
+{
+       BackgroundWorker        bgw;
+       BackgroundWorkerHandle *bgw_handle;
+       int                                     slot;
+       LogicalRepWorker   *worker = NULL;
+
+       ereport(LOG,
+                       (errmsg("starting logical replication worker for subscription \"%s\"",
+                                       subname)));
+
+       /* Report this after the initial starting message for consistency. */
+       if (max_replication_slots == 0)
+               ereport(ERROR,
+                               (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
+                                errmsg("cannot start logical replication workers when max_replication_slots = 0")));
+
+       /*
+        * We need to do the modification of the shared memory under lock so that
+        * we have consistent view.
+        */
+       LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE);
+
+       /* Find unused worker slot. */
+       for (slot = 0; slot < max_logical_replication_workers; slot++)
+       {
+               if (!LogicalRepCtx->workers[slot].proc)
+               {
+                       worker = &LogicalRepCtx->workers[slot];
+                       break;
+               }
+       }
+
+       /* Bail if not found */
+       if (worker == NULL)
+       {
+               ereport(WARNING,
+                               (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
+                                errmsg("out of logical replication workers slots"),
+                                errhint("You might need to increase max_logical_replication_workers.")));
+               return;
+       }
+
+       /* Prepare the worker info. */
+       memset(worker, 0, sizeof(LogicalRepWorker));
+       worker->dbid = dbid;
+       worker->userid = userid;
+       worker->subid = subid;
+
+       LWLockRelease(LogicalRepWorkerLock);
+
+       /* Register the new dynamic worker. */
+       bgw.bgw_flags = BGWORKER_SHMEM_ACCESS |
+               BGWORKER_BACKEND_DATABASE_CONNECTION;
+       bgw.bgw_start_time = BgWorkerStart_RecoveryFinished;
+       bgw.bgw_main = ApplyWorkerMain;
+       snprintf(bgw.bgw_name, BGW_MAXLEN,
+                        "logical replication worker for subscription %u", subid);
+
+       bgw.bgw_restart_time = BGW_NEVER_RESTART;
+       bgw.bgw_notify_pid = MyProcPid;
+       bgw.bgw_main_arg = slot;
+
+       if (!RegisterDynamicBackgroundWorker(&bgw, &bgw_handle))
+       {
+               ereport(WARNING,
+                               (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
+                                errmsg("out of background workers slots"),
+                                errhint("You might need to increase max_worker_processes.")));
+               return;
+       }
+
+       /* Now wait until it attaches. */
+       WaitForReplicationWorkerAttach(worker, bgw_handle);
+}
+
+/*
+ * Stop the logical replication worker and wait until it detaches from the
+ * slot.
+ *
+ * The caller must hold LogicalRepLauncherLock to ensure that new workers are
+ * not being started during this function call.
+ */
+void
+logicalrep_worker_stop(Oid subid)
+{
+       LogicalRepWorker *worker;
+
+       Assert(LWLockHeldByMe(LogicalRepLauncherLock));
+
+       LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
+
+       worker = logicalrep_worker_find(subid);
+
+       /* No worker, nothing to do. */
+       if (!worker)
+       {
+               LWLockRelease(LogicalRepWorkerLock);
+               return;
+       }
+
+       /*
+        * If we found worker but it does not have proc set it is starting up,
+        * wait for it to finish and then kill it.
+        */
+       while (worker && !worker->proc)
+       {
+               int     rc;
+
+               LWLockRelease(LogicalRepWorkerLock);
+
+               CHECK_FOR_INTERRUPTS();
+
+               /* Wait for signal. */
+               rc = WaitLatch(&MyProc->procLatch,
+                                          WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
+                                          1000L, WAIT_EVENT_BGWORKER_STARTUP);
+
+               /* emergency bailout if postmaster has died */
+               if (rc & WL_POSTMASTER_DEATH)
+                       proc_exit(1);
+
+               ResetLatch(&MyProc->procLatch);
+
+               /* Check if the worker has started. */
+               LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
+               worker = logicalrep_worker_find(subid);
+               if (!worker || worker->proc)
+                       break;
+       }
+
+       /* Now terminate the worker ... */
+       kill(worker->proc->pid, SIGTERM);
+       LWLockRelease(LogicalRepWorkerLock);
+
+       /* ... and wait for it to die. */
+       for (;;)
+       {
+               int     rc;
+
+               LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
+               if (!worker->proc)
+               {
+                       LWLockRelease(LogicalRepWorkerLock);
+                       break;
+               }
+               LWLockRelease(LogicalRepWorkerLock);
+
+               CHECK_FOR_INTERRUPTS();
+
+               /* Wait for more work. */
+               rc = WaitLatch(&MyProc->procLatch,
+                                          WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
+                                          1000L, WAIT_EVENT_BGWORKER_SHUTDOWN);
+
+               /* emergency bailout if postmaster has died */
+               if (rc & WL_POSTMASTER_DEATH)
+                       proc_exit(1);
+
+               ResetLatch(&MyProc->procLatch);
+       }
+}
+
+/*
+ * Attach to a slot.
+ */
+void
+logicalrep_worker_attach(int slot)
+{
+       /* Block concurrent access. */
+       LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE);
+
+       Assert(slot >= 0 && slot < max_logical_replication_workers);
+       MyLogicalRepWorker = &LogicalRepCtx->workers[slot];
+
+       if (MyLogicalRepWorker->proc)
+               ereport(ERROR,
+                          (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                               errmsg("logical replication worker slot %d already used by "
+                                          "another worker", slot)));
+
+       MyLogicalRepWorker->proc = MyProc;
+       before_shmem_exit(logicalrep_worker_onexit, (Datum) 0);
+
+       LWLockRelease(LogicalRepWorkerLock);
+}
+
+/*
+ * Detach the worker (cleans up the worker info).
+ */
+static void
+logicalrep_worker_detach(void)
+{
+       /* Block concurrent access. */
+       LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE);
+
+       MyLogicalRepWorker->dbid = InvalidOid;
+       MyLogicalRepWorker->userid = InvalidOid;
+       MyLogicalRepWorker->subid = InvalidOid;
+       MyLogicalRepWorker->proc = NULL;
+
+       LWLockRelease(LogicalRepWorkerLock);
+}
+
+/*
+ * Cleanup function.
+ *
+ * Called on logical replication worker exit.
+ */
+static void
+logicalrep_worker_onexit(int code, Datum arg)
+{
+       logicalrep_worker_detach();
+}
+
+/* SIGTERM: set flag to exit at next convenient time */
+void
+logicalrep_worker_sigterm(SIGNAL_ARGS)
+{
+       got_SIGTERM = true;
+
+       /* Waken anything waiting on the process latch */
+       SetLatch(MyLatch);
+}
+
+/*
+ * ApplyLauncherShmemSize
+ *             Compute space needed for replication launcher shared memory
+ */
+Size
+ApplyLauncherShmemSize(void)
+{
+       Size            size;
+
+       /*
+        * Need the fixed struct and the array of LogicalRepWorker.
+        */
+       size = sizeof(LogicalRepCtxStruct);
+       size = MAXALIGN(size);
+       size = add_size(size, mul_size(max_logical_replication_workers,
+                                                                  sizeof(LogicalRepWorker)));
+       return size;
+}
+
+void
+ApplyLauncherRegister(void)
+{
+       BackgroundWorker bgw;
+
+       if (max_logical_replication_workers == 0)
+               return;
+
+       bgw.bgw_flags = BGWORKER_SHMEM_ACCESS |
+               BGWORKER_BACKEND_DATABASE_CONNECTION;
+       bgw.bgw_start_time = BgWorkerStart_RecoveryFinished;
+       bgw.bgw_main = ApplyLauncherMain;
+       snprintf(bgw.bgw_name, BGW_MAXLEN,
+                        "logical replication launcher");
+       bgw.bgw_restart_time = 5;
+       bgw.bgw_notify_pid = 0;
+       bgw.bgw_main_arg = (Datum) 0;
+
+       RegisterBackgroundWorker(&bgw);
+}
+
+/*
+ * ApplyLauncherShmemInit
+ *             Allocate and initialize replication launcher shared memory
+ */
+void
+ApplyLauncherShmemInit(void)
+{
+       bool            found;
+
+       LogicalRepCtx = (LogicalRepCtxStruct *)
+               ShmemInitStruct("Logical Replication Launcher Data",
+                                               ApplyLauncherShmemSize(),
+                                               &found);
+
+       if (!found)
+               memset(LogicalRepCtx, 0, ApplyLauncherShmemSize());
+}
+
+/*
+ * Wakeup the launcher on commit if requested.
+ */
+void
+AtCommit_ApplyLauncher(void)
+{
+       if (on_commit_laucher_wakeup)
+               ApplyLauncherWakeup();
+}
+
+/*
+ * Request wakeup of the launcher on commit of the transaction.
+ *
+ * This is used to send launcher signal to stop sleeping and proccess the
+ * subscriptions when current transaction commits. Should be used when new
+ * tuple was added to the pg_subscription catalog.
+*/
+void
+ApplyLauncherWakeupAtCommit(void)
+{
+       if (!on_commit_laucher_wakeup)
+               on_commit_laucher_wakeup = true;
+}
+
+void
+ApplyLauncherWakeup(void)
+{
+       if (IsBackendPid(LogicalRepCtx->launcher_pid))
+               kill(LogicalRepCtx->launcher_pid, SIGUSR1);
+}
+
+/*
+ * Main loop for the apply launcher process.
+ */
+void
+ApplyLauncherMain(Datum main_arg)
+{
+       ereport(LOG,
+                       (errmsg("logical replication launcher started")));
+
+       /* Establish signal handlers. */
+       pqsignal(SIGTERM, logicalrep_worker_sigterm);
+       BackgroundWorkerUnblockSignals();
+
+       /* Make it easy to identify our processes. */
+       SetConfigOption("application_name", MyBgworkerEntry->bgw_name,
+                                       PGC_USERSET, PGC_S_SESSION);
+
+       LogicalRepCtx->launcher_pid = MyProcPid;
+
+       /*
+        * Establish connection to nailed catalogs (we only ever access
+        * pg_subscription).
+        */
+       BackgroundWorkerInitializeConnection(NULL, NULL);
+
+       /* Enter main loop */
+       while (!got_SIGTERM)
+       {
+               int                     rc;
+               List       *sublist;
+               ListCell   *lc;
+               MemoryContext   subctx;
+               MemoryContext   oldctx;
+               TimestampTz             now;
+               TimestampTz             last_start_time = 0;
+               long                    wait_time = DEFAULT_NAPTIME_PER_CYCLE;
+
+               now = GetCurrentTimestamp();
+
+               /* Limit the start retry to once a wal_retrieve_retry_interval */
+               if (TimestampDifferenceExceeds(last_start_time, now,
+                                                                          wal_retrieve_retry_interval))
+               {
+                       /* Use temporary context for the database list and worker info. */
+                       subctx = AllocSetContextCreate(TopMemoryContext,
+                                                                                  "Logical Replication Launcher sublist",
+                                                                                  ALLOCSET_DEFAULT_MINSIZE,
+                                                                                  ALLOCSET_DEFAULT_INITSIZE,
+                                                                                  ALLOCSET_DEFAULT_MAXSIZE);
+                       oldctx = MemoryContextSwitchTo(subctx);
+
+                       /* Block any concurrent DROP SUBSCRIPTION. */
+                       LWLockAcquire(LogicalRepLauncherLock, LW_EXCLUSIVE);
+
+                       /* search for subscriptions to start or stop. */
+                       sublist = get_subscription_list();
+
+                       /* Start the missing workers for enabled subscriptions. */
+                       foreach(lc, sublist)
+                       {
+                               Subscription       *sub = (Subscription *) lfirst(lc);
+                               LogicalRepWorker   *w;
+
+                               LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
+                               w = logicalrep_worker_find(sub->oid);
+                               LWLockRelease(LogicalRepWorkerLock);
+
+                               if (sub->enabled && w == NULL)
+                               {
+                                       logicalrep_worker_launch(sub->dbid, sub->oid, sub->name, sub->owner);
+                                       last_start_time = now;
+                                       wait_time = wal_retrieve_retry_interval;
+                                       /* Limit to one worker per mainloop cycle. */
+                                       break;
+                               }
+                       }
+
+                       LWLockRelease(LogicalRepLauncherLock);
+
+                       /* Switch back to original memory context. */
+                       MemoryContextSwitchTo(oldctx);
+                       /* Clean the temporary memory. */
+                       MemoryContextDelete(subctx);
+               }
+               else
+               {
+                       /*
+                        * The wait in previous cycle was interruped in less than
+                        * wal_retrieve_retry_interval since last worker was started,
+                        * this usually means crash of the worker, so we should retry
+                        * in wal_retrieve_retry_interval again.
+                        */
+                       wait_time = wal_retrieve_retry_interval;
+               }
+
+               /* Wait for more work. */
+               rc = WaitLatch(&MyProc->procLatch,
+                                          WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
+                                          wait_time,
+                                          WAIT_EVENT_LOGICAL_LAUNCHER_MAIN);
+
+               /* emergency bailout if postmaster has died */
+               if (rc & WL_POSTMASTER_DEATH)
+                       proc_exit(1);
+
+               ResetLatch(&MyProc->procLatch);
+       }
+
+       LogicalRepCtx->launcher_pid = 0;
+
+       /* ... and if it returns, we're done */
+       ereport(LOG,
+                       (errmsg("logical replication launcher shutting down")));
+
+       proc_exit(0);
+}
+
+/*
+ * Returns state of the subscriptions.
+ */
+Datum
+pg_stat_get_subscription(PG_FUNCTION_ARGS)
+{
+#define PG_STAT_GET_SUBSCRIPTION_COLS  7
+       Oid                     subid = PG_ARGISNULL(0) ? InvalidOid : PG_GETARG_OID(0);
+       int                     i;
+       ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+       TupleDesc       tupdesc;
+       Tuplestorestate *tupstore;
+       MemoryContext per_query_ctx;
+       MemoryContext oldcontext;
+
+       /* check to see if caller supports us returning a tuplestore */
+       if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
+               ereport(ERROR,
+                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                errmsg("set-valued function called in context that cannot accept a set")));
+       if (!(rsinfo->allowedModes & SFRM_Materialize))
+               ereport(ERROR,
+                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                errmsg("materialize mode required, but it is not " \
+                                               "allowed in this context")));
+
+       /* Build a tuple descriptor for our result type */
+       if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+               elog(ERROR, "return type must be a row type");
+
+       per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
+       oldcontext = MemoryContextSwitchTo(per_query_ctx);
+
+       tupstore = tuplestore_begin_heap(true, false, work_mem);
+       rsinfo->returnMode = SFRM_Materialize;
+       rsinfo->setResult = tupstore;
+       rsinfo->setDesc = tupdesc;
+
+       MemoryContextSwitchTo(oldcontext);
+
+       /* Make sure we get consistent view of the workers. */
+       LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
+
+       for (i = 0; i <= max_logical_replication_workers; i++)
+       {
+               /* for each row */
+               Datum           values[PG_STAT_GET_SUBSCRIPTION_COLS];
+               bool            nulls[PG_STAT_GET_SUBSCRIPTION_COLS];
+               int                     worker_pid;
+               LogicalRepWorker        worker;
+
+               memcpy(&worker, &LogicalRepCtx->workers[i],
+                          sizeof(LogicalRepWorker));
+               if (!worker.proc || !IsBackendPid(worker.proc->pid))
+                       continue;
+
+               if (OidIsValid(subid) && worker.subid != subid)
+                       continue;
+
+               worker_pid = worker.proc->pid;
+
+               MemSet(values, 0, sizeof(values));
+               MemSet(nulls, 0, sizeof(nulls));
+
+               values[0] = ObjectIdGetDatum(worker.subid);
+               values[1] = Int32GetDatum(worker_pid);
+               if (XLogRecPtrIsInvalid(worker.last_lsn))
+                       nulls[2] = true;
+               else
+                       values[2] = LSNGetDatum(worker.last_lsn);
+               if (worker.last_send_time == 0)
+                       nulls[3] = true;
+               else
+                       values[3] = TimestampTzGetDatum(worker.last_send_time);
+               if (worker.last_recv_time == 0)
+                       nulls[4] = true;
+               else
+                       values[4] = TimestampTzGetDatum(worker.last_recv_time);
+               if (XLogRecPtrIsInvalid(worker.reply_lsn))
+                       nulls[5] = true;
+               else
+                       values[5] = LSNGetDatum(worker.reply_lsn);
+               if (worker.reply_time == 0)
+                       nulls[6] = true;
+               else
+                       values[6] = TimestampTzGetDatum(worker.reply_time);
+
+               tuplestore_putvalues(tupstore, tupdesc, values, nulls);
+
+               /* If only a single subscription was requested, and we found it, break. */
+               if (OidIsValid(subid))
+                       break;
+       }
+
+       LWLockRelease(LogicalRepWorkerLock);
+
+       /* clean up and return the tuplestore */
+       tuplestore_donestoring(tupstore);
+
+       return (Datum) 0;
+}
diff --git a/src/backend/replication/logical/proto.c b/src/backend/replication/logical/proto.c
new file mode 100644 (file)
index 0000000..1f30de6
--- /dev/null
@@ -0,0 +1,637 @@
+/*-------------------------------------------------------------------------
+ *
+ * proto.c
+ *             logical replication protocol functions
+ *
+ * Copyright (c) 2015, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *             src/backend/replication/logical/proto.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/sysattr.h"
+#include "catalog/pg_namespace.h"
+#include "catalog/pg_type.h"
+#include "libpq/pqformat.h"
+#include "replication/logicalproto.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/syscache.h"
+
+/*
+ * Protocol message flags.
+ */
+#define LOGICALREP_IS_REPLICA_IDENTITY 1
+
+static void logicalrep_write_attrs(StringInfo out, Relation rel);
+static void logicalrep_write_tuple(StringInfo out, Relation rel,
+                                                                  HeapTuple tuple);
+
+static void logicalrep_read_attrs(StringInfo in, LogicalRepRelation *rel);
+static void logicalrep_read_tuple(StringInfo in, LogicalRepTupleData *tuple);
+
+static void logicalrep_write_namespace(StringInfo out, Oid nspid);
+static const char *logicalrep_read_namespace(StringInfo in);
+
+/*
+ * Write BEGIN to the output stream.
+ */
+void
+logicalrep_write_begin(StringInfo out, ReorderBufferTXN *txn)
+{
+       pq_sendbyte(out, 'B');          /* BEGIN */
+
+       /* fixed fields */
+       pq_sendint64(out, txn->final_lsn);
+       pq_sendint64(out, txn->commit_time);
+       pq_sendint(out, txn->xid, 4);
+}
+
+/*
+ * Read transaction BEGIN from the stream.
+ */
+void
+logicalrep_read_begin(StringInfo in, LogicalRepBeginData *begin_data)
+{
+       /* read fields */
+       begin_data->final_lsn = pq_getmsgint64(in);
+       if (begin_data->final_lsn == InvalidXLogRecPtr)
+               elog(ERROR, "final_lsn not set in begin message");
+       begin_data->committime = pq_getmsgint64(in);
+       begin_data->xid = pq_getmsgint(in, 4);
+}
+
+
+/*
+ * Write COMMIT to the output stream.
+ */
+void
+logicalrep_write_commit(StringInfo out, ReorderBufferTXN *txn,
+                                               XLogRecPtr commit_lsn)
+{
+       uint8 flags = 0;
+
+       pq_sendbyte(out, 'C');          /* sending COMMIT */
+
+       /* send the flags field (unused for now) */
+       pq_sendbyte(out, flags);
+
+       /* send fields */
+       pq_sendint64(out, commit_lsn);
+       pq_sendint64(out, txn->end_lsn);
+       pq_sendint64(out, txn->commit_time);
+}
+
+/*
+ * Read transaction COMMIT from the stream.
+ */
+void
+logicalrep_read_commit(StringInfo in, LogicalRepCommitData *commit_data)
+{
+       /* read flags (unused for now) */
+       uint8   flags = pq_getmsgbyte(in);
+
+       if (flags != 0)
+               elog(ERROR, "unknown flags %u in commit message", flags);
+
+       /* read fields */
+       commit_data->commit_lsn = pq_getmsgint64(in);
+       commit_data->end_lsn = pq_getmsgint64(in);
+       commit_data->committime = pq_getmsgint64(in);
+}
+
+/*
+ * Write ORIGIN to the output stream.
+ */
+void
+logicalrep_write_origin(StringInfo out, const char *origin,
+                                               XLogRecPtr origin_lsn)
+{
+       pq_sendbyte(out, 'O');          /* ORIGIN */
+
+       /* fixed fields */
+       pq_sendint64(out, origin_lsn);
+
+       /* origin string */
+       pq_sendstring(out, origin);
+}
+
+/*
+ * Read ORIGIN from the output stream.
+ */
+char *
+logicalrep_read_origin(StringInfo in, XLogRecPtr *origin_lsn)
+{
+       /* fixed fields */
+       *origin_lsn = pq_getmsgint64(in);
+
+       /* return origin */
+       return pstrdup(pq_getmsgstring(in));
+}
+
+/*
+ * Write INSERT to the output stream.
+ */
+void
+logicalrep_write_insert(StringInfo out,        Relation rel, HeapTuple newtuple)
+{
+       pq_sendbyte(out, 'I');          /* action INSERT */
+
+       Assert(rel->rd_rel->relreplident == REPLICA_IDENTITY_DEFAULT ||
+                  rel->rd_rel->relreplident == REPLICA_IDENTITY_FULL ||
+                  rel->rd_rel->relreplident == REPLICA_IDENTITY_INDEX);
+
+       /* use Oid as relation identifier */
+       pq_sendint(out, RelationGetRelid(rel), 4);
+
+       pq_sendbyte(out, 'N');          /* new tuple follows */
+       logicalrep_write_tuple(out, rel, newtuple);
+}
+
+/*
+ * Read INSERT from stream.
+ *
+ * Fills the new tuple.
+ */
+LogicalRepRelId
+logicalrep_read_insert(StringInfo in, LogicalRepTupleData *newtup)
+{
+       char            action;
+       LogicalRepRelId         relid;
+
+       /* read the relation id */
+       relid = pq_getmsgint(in, 4);
+
+       action = pq_getmsgbyte(in);
+       if (action != 'N')
+               elog(ERROR, "expected new tuple but got %d",
+                        action);
+
+       logicalrep_read_tuple(in, newtup);
+
+       return relid;
+}
+
+/*
+ * Write UPDATE to the output stream.
+ */
+void
+logicalrep_write_update(StringInfo out, Relation rel, HeapTuple oldtuple,
+                                          HeapTuple newtuple)
+{
+       pq_sendbyte(out, 'U');          /* action UPDATE */
+
+       Assert(rel->rd_rel->relreplident == REPLICA_IDENTITY_DEFAULT ||
+                  rel->rd_rel->relreplident == REPLICA_IDENTITY_FULL ||
+                  rel->rd_rel->relreplident == REPLICA_IDENTITY_INDEX);
+
+       /* use Oid as relation identifier */
+       pq_sendint(out, RelationGetRelid(rel), 4);
+
+       if (oldtuple != NULL)
+       {
+               if (rel->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
+                       pq_sendbyte(out, 'O');  /* old tuple follows */
+               else
+                       pq_sendbyte(out, 'K');  /* old key follows */
+               logicalrep_write_tuple(out, rel, oldtuple);
+       }
+
+       pq_sendbyte(out, 'N');          /* new tuple follows */
+       logicalrep_write_tuple(out, rel, newtuple);
+}
+
+/*
+ * Read UPDATE from stream.
+ */
+LogicalRepRelId
+logicalrep_read_update(StringInfo in, bool *has_oldtuple,
+                                          LogicalRepTupleData *oldtup,
+                                          LogicalRepTupleData *newtup)
+{
+       char            action;
+       LogicalRepRelId         relid;
+
+       /* read the relation id */
+       relid = pq_getmsgint(in, 4);
+
+       /* read and verify action */
+       action = pq_getmsgbyte(in);
+       if (action != 'K' && action != 'O' && action != 'N')
+               elog(ERROR, "expected action 'N', 'O' or 'K', got %c",
+                        action);
+
+       /* check for old tuple */
+       if (action == 'K' || action == 'O')
+       {
+               logicalrep_read_tuple(in, oldtup);
+               *has_oldtuple = true;
+
+               action = pq_getmsgbyte(in);
+       }
+       else
+               *has_oldtuple = false;
+
+       /* check for new  tuple */
+       if (action != 'N')
+               elog(ERROR, "expected action 'N', got %c",
+                        action);
+
+       logicalrep_read_tuple(in, newtup);
+
+       return relid;
+}
+
+/*
+ * Write DELETE to the output stream.
+ */
+void
+logicalrep_write_delete(StringInfo out, Relation rel, HeapTuple oldtuple)
+{
+       Assert(rel->rd_rel->relreplident == REPLICA_IDENTITY_DEFAULT ||
+                  rel->rd_rel->relreplident == REPLICA_IDENTITY_FULL ||
+                  rel->rd_rel->relreplident == REPLICA_IDENTITY_INDEX);
+
+       pq_sendbyte(out, 'D');          /* action DELETE */
+
+       /* use Oid as relation identifier */
+       pq_sendint(out, RelationGetRelid(rel), 4);
+
+       if (rel->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
+               pq_sendbyte(out, 'O');  /* old tuple follows */
+       else
+               pq_sendbyte(out, 'K');  /* old key follows */
+
+       logicalrep_write_tuple(out, rel, oldtuple);
+}
+
+/*
+ * Read DELETE from stream.
+ *
+ * Fills the old tuple.
+ */
+LogicalRepRelId
+logicalrep_read_delete(StringInfo in, LogicalRepTupleData *oldtup)
+{
+       char            action;
+       LogicalRepRelId         relid;
+
+       /* read the relation id */
+       relid = pq_getmsgint(in, 4);
+
+       /* read and verify action */
+       action = pq_getmsgbyte(in);
+       if (action != 'K' && action != 'O')
+               elog(ERROR, "expected action 'O' or 'K', got %c", action);
+
+       logicalrep_read_tuple(in, oldtup);
+
+       return relid;
+}
+
+/*
+ * Write relation description to the output stream.
+ */
+void
+logicalrep_write_rel(StringInfo out, Relation rel)
+{
+       char       *relname;
+
+       pq_sendbyte(out, 'R');          /* sending RELATION */
+
+       /* use Oid as relation identifier */
+       pq_sendint(out, RelationGetRelid(rel), 4);
+
+       /* send qualified relation name */
+       logicalrep_write_namespace(out, RelationGetNamespace(rel));
+       relname = RelationGetRelationName(rel);
+       pq_sendstring(out, relname);
+
+       /* send replica identity */
+       pq_sendbyte(out, rel->rd_rel->relreplident);
+
+       /* send the attribute info */
+       logicalrep_write_attrs(out, rel);
+}
+
+/*
+ * Read the relation info from stream and return as LogicalRepRelation.
+ */
+LogicalRepRelation *
+logicalrep_read_rel(StringInfo in)
+{
+       LogicalRepRelation      *rel = palloc(sizeof(LogicalRepRelation));
+
+       rel->remoteid = pq_getmsgint(in, 4);
+
+       /* Read relation name from stream */
+       rel->nspname = pstrdup(logicalrep_read_namespace(in));
+       rel->relname = pstrdup(pq_getmsgstring(in));
+
+       /* Read the replica identity. */
+       rel->replident = pq_getmsgbyte(in);
+
+       /* Get attribute description */
+       logicalrep_read_attrs(in, rel);
+
+       return rel;
+}
+
+/*
+ * Write type info to the output stream.
+ *
+ * This function will always write base type info.
+ */
+void
+logicalrep_write_typ(StringInfo out, Oid typoid)
+{
+       Oid                     basetypoid = getBaseType(typoid);
+       HeapTuple       tup;
+       Form_pg_type typtup;
+
+       pq_sendbyte(out, 'Y');          /* sending TYPE */
+
+       tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(basetypoid));
+       if (!HeapTupleIsValid(tup))
+               elog(ERROR, "cache lookup failed for type %u", basetypoid);
+       typtup = (Form_pg_type) GETSTRUCT(tup);
+
+       /* use Oid as relation identifier */
+       pq_sendint(out, typoid, 4);
+
+       /* send qualified type name */
+       logicalrep_write_namespace(out, typtup->typnamespace);
+       pq_sendstring(out, NameStr(typtup->typname));
+
+       ReleaseSysCache(tup);
+}
+
+/*
+ * Read type info from the output stream.
+ */
+void
+logicalrep_read_typ(StringInfo in, LogicalRepTyp *ltyp)
+{
+       ltyp->remoteid = pq_getmsgint(in, 4);
+
+       /* Read tupe name from stream */
+       ltyp->nspname = pstrdup(logicalrep_read_namespace(in));
+       ltyp->typname = pstrdup(pq_getmsgstring(in));
+}
+
+/*
+ * Write a tuple to the outputstream, in the most efficient format possible.
+ */
+static void
+logicalrep_write_tuple(StringInfo out, Relation rel, HeapTuple tuple)
+{
+       TupleDesc       desc;
+       Datum           values[MaxTupleAttributeNumber];
+       bool            isnull[MaxTupleAttributeNumber];
+       int                     i;
+       uint16          nliveatts = 0;
+
+       desc = RelationGetDescr(rel);
+
+       for (i = 0; i < desc->natts; i++)
+       {
+               if (desc->attrs[i]->attisdropped)
+                       continue;
+               nliveatts++;
+       }
+       pq_sendint(out, nliveatts, 2);
+
+       /* try to allocate enough memory from the get-go */
+       enlargeStringInfo(out, tuple->t_len +
+                                         nliveatts * (1 + 4));
+
+       heap_deform_tuple(tuple, desc, values, isnull);
+
+       /* Write the values */
+       for (i = 0; i < desc->natts; i++)
+       {
+               HeapTuple       typtup;
+               Form_pg_type typclass;
+               Form_pg_attribute att = desc->attrs[i];
+               char       *outputstr;
+               int                     len;
+
+               /* skip dropped columns */
+               if (att->attisdropped)
+                       continue;
+
+               if (isnull[i])
+               {
+                       pq_sendbyte(out, 'n');  /* null column */
+                       continue;
+               }
+               else if (att->attlen == -1 && VARATT_IS_EXTERNAL_ONDISK(values[i]))
+               {
+                       pq_sendbyte(out, 'u');  /* unchanged toast column */
+                       continue;
+               }
+
+               typtup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(att->atttypid));
+               if (!HeapTupleIsValid(typtup))
+                       elog(ERROR, "cache lookup failed for type %u", att->atttypid);
+               typclass = (Form_pg_type) GETSTRUCT(typtup);
+
+               pq_sendbyte(out, 't');  /* 'text' data follows */
+
+               outputstr = OidOutputFunctionCall(typclass->typoutput, values[i]);
+               len = strlen(outputstr) + 1;    /* null terminated */
+               pq_sendint(out, len, 4);                /* length */
+               appendBinaryStringInfo(out, outputstr, len); /* data */
+
+               pfree(outputstr);
+
+               ReleaseSysCache(typtup);
+       }
+}
+
+/*
+ * Read tuple in remote format from stream.
+ *
+ * The returned tuple points into the input stringinfo.
+ */
+static void
+logicalrep_read_tuple(StringInfo in, LogicalRepTupleData *tuple)
+{
+       int                     i;
+       int                     natts;
+
+       /* Get of attributes. */
+       natts = pq_getmsgint(in, 2);
+
+       memset(tuple->changed, 0, sizeof(tuple->changed));
+
+       /* Read the data */
+       for (i = 0; i < natts; i++)
+       {
+               char            kind;
+               int                     len;
+
+               kind = pq_getmsgbyte(in);
+
+               switch (kind)
+               {
+                       case 'n': /* null */
+                               tuple->values[i] = NULL;
+                               tuple->changed[i] = true;
+                               break;
+                       case 'u': /* unchanged column */
+                               tuple->values[i] = (char *) 0xdeadbeef; /* make bad usage more obvious */
+                               break;
+                       case 't': /* text formatted value */
+                               {
+                                       tuple->changed[i] = true;
+
+                                       len = pq_getmsgint(in, 4); /* read length */
+
+                                       /* and data */
+                                       tuple->values[i] = (char *) pq_getmsgbytes(in, len);
+                               }
+                               break;
+                       default:
+                               elog(ERROR, "unknown data representation type '%c'", kind);
+               }
+       }
+}
+
+/*
+ * Write relation attributes to the stream.
+ */
+static void
+logicalrep_write_attrs(StringInfo out, Relation rel)
+{
+       TupleDesc       desc;
+       int                     i;
+       uint16          nliveatts = 0;
+       Bitmapset  *idattrs = NULL;
+       bool            replidentfull;
+
+       desc = RelationGetDescr(rel);
+
+       /* send number of live attributes */
+       for (i = 0; i < desc->natts; i++)
+       {
+               if (desc->attrs[i]->attisdropped)
+                       continue;
+               nliveatts++;
+       }
+       pq_sendint(out, nliveatts, 2);
+
+       /* fetch bitmap of REPLICATION IDENTITY attributes */
+       replidentfull = (rel->rd_rel->relreplident == REPLICA_IDENTITY_FULL);
+       if (!replidentfull)
+               idattrs = RelationGetIndexAttrBitmap(rel,
+                                                                                        INDEX_ATTR_BITMAP_IDENTITY_KEY);
+
+       /* send the attributes */
+       for (i = 0; i < desc->natts; i++)
+       {
+               Form_pg_attribute att = desc->attrs[i];
+               uint8                   flags = 0;
+
+               if (att->attisdropped)
+                       continue;
+
+               /* REPLICA IDENTITY FULL means all colums are sent as part of key. */
+               if (replidentfull ||
+                       bms_is_member(att->attnum - FirstLowInvalidHeapAttributeNumber,
+                                                 idattrs))
+                       flags |= LOGICALREP_IS_REPLICA_IDENTITY;
+
+               pq_sendbyte(out, flags);
+
+               /* attribute name */
+               pq_sendstring(out, NameStr(att->attname));
+
+               /* attribute type id */
+               pq_sendint(out, (int) att->atttypid, sizeof(att->atttypid));
+
+               /* attribute mode */
+               pq_sendint(out, att->atttypmod, sizeof(att->atttypmod));
+       }
+
+       bms_free(idattrs);
+}
+
+/*
+ * Read relation attribute names from the stream.
+ */
+static void
+logicalrep_read_attrs(StringInfo in, LogicalRepRelation *rel)
+{
+       int                     i;
+       int                     natts;
+       char      **attnames;
+       Oid                *atttyps;
+       Bitmapset  *attkeys = NULL;
+
+       natts = pq_getmsgint(in, 2);
+       attnames = palloc(natts * sizeof(char *));
+       atttyps = palloc(natts * sizeof(Oid));
+
+       /* read the attributes */
+       for (i = 0; i < natts; i++)
+       {
+               uint8           flags;
+
+               /* Check for replica identity column */
+               flags = pq_getmsgbyte(in);
+               if (flags & LOGICALREP_IS_REPLICA_IDENTITY)
+                       attkeys = bms_add_member(attkeys, i);
+
+               /* attribute name */
+               attnames[i] = pstrdup(pq_getmsgstring(in));
+
+               /* attribute type id */
+               atttyps[i] = (Oid) pq_getmsgint(in, 4);
+
+               /* we ignore attribute mode for now */
+               (void) pq_getmsgint(in, 4);
+       }
+
+       rel->attnames = attnames;
+       rel->atttyps = atttyps;
+       rel->attkeys = attkeys;
+       rel->natts = natts;
+}
+
+/*
+ * Write the namespace name or empty string for pg_catalog (to save space).
+ */
+static void
+logicalrep_write_namespace(StringInfo out, Oid nspid)
+{
+       if (nspid == PG_CATALOG_NAMESPACE)
+               pq_sendbyte(out, '\0');
+       else
+       {
+               char *nspname = get_namespace_name(nspid);
+
+               if (nspname == NULL)
+                       elog(ERROR, "cache lookup failed for namespace %u",
+                                nspid);
+
+               pq_sendstring(out, nspname);
+       }
+}
+
+/*
+ * Read the namespace name while treating empty string as pg_catalog.
+ */
+static const char *
+logicalrep_read_namespace(StringInfo in)
+{
+       const char *nspname = pq_getmsgstring(in);
+
+       if (nspname[0] == '\0')
+               nspname = "pg_catalog";
+
+       return nspname;
+}
diff --git a/src/backend/replication/logical/relation.c b/src/backend/replication/logical/relation.c
new file mode 100644 (file)
index 0000000..383c6eb
--- /dev/null
@@ -0,0 +1,489 @@
+/*-------------------------------------------------------------------------
+ * relation.c
+ *        PostgreSQL logical replication
+ *
+ * Copyright (c) 2012-2016, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *       src/backend/replication/logical/relation.c
+ *
+ * NOTES
+ *       This file contains helper functions for logical replication relation
+ *       mapping cache.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/heapam.h"
+#include "access/sysattr.h"
+#include "catalog/namespace.h"
+#include "nodes/makefuncs.h"
+#include "replication/logicalrelation.h"
+#include "replication/worker_internal.h"
+#include "utils/builtins.h"
+#include "utils/inval.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/syscache.h"
+
+static MemoryContext   LogicalRepRelMapContext = NULL;
+
+static HTAB                       *LogicalRepRelMap = NULL;
+static HTAB                       *LogicalRepTypMap = NULL;
+
+static void logicalrep_typmap_invalidate_cb(Datum arg, int cacheid,
+                                                                                       uint32 hashvalue);
+
+/*
+ * Relcache invalidation callback for our relation map cache.
+ */
+static void
+logicalrep_relmap_invalidate_cb(Datum arg, Oid reloid)
+{
+       LogicalRepRelMapEntry  *entry;
+
+       /* Just to be sure. */
+       if (LogicalRepRelMap == NULL)
+               return;
+
+       if (reloid != InvalidOid)
+       {
+               HASH_SEQ_STATUS status;
+
+               hash_seq_init(&status, LogicalRepRelMap);
+
+               /* TODO, use inverse lookup hashtable? */
+               while ((entry = (LogicalRepRelMapEntry *) hash_seq_search(&status)) != NULL)
+               {
+                       if (entry->localreloid == reloid)
+                       {
+                               entry->localreloid = InvalidOid;
+                               hash_seq_term(&status);
+                               break;
+                       }
+               }
+       }
+       else
+       {
+               /* invalidate all cache entries */
+               HASH_SEQ_STATUS status;
+
+               hash_seq_init(&status, LogicalRepRelMap);
+
+               while ((entry = (LogicalRepRelMapEntry *) hash_seq_search(&status)) != NULL)
+                       entry->localreloid = InvalidOid;
+       }
+}
+
+/*
+ * Initialize the relation map cache.
+ */
+static void
+logicalrep_relmap_init()
+{
+       HASHCTL         ctl;
+
+       if (!LogicalRepRelMapContext)
+               LogicalRepRelMapContext =
+                       AllocSetContextCreate(CacheMemoryContext,
+                                                                 "LogicalRepRelMapContext",
+                                                                 ALLOCSET_DEFAULT_SIZES);
+
+       /* Initialize the relation hash table. */
+       MemSet(&ctl, 0, sizeof(ctl));
+       ctl.keysize = sizeof(LogicalRepRelId);
+       ctl.entrysize = sizeof(LogicalRepRelMapEntry);
+       ctl.hcxt = LogicalRepRelMapContext;
+
+       LogicalRepRelMap = hash_create("logicalrep relation map cache", 128, &ctl,
+                                                                  HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+
+       /* Initialize the type hash table. */
+       MemSet(&ctl, 0, sizeof(ctl));
+       ctl.keysize = sizeof(Oid);
+       ctl.entrysize = sizeof(LogicalRepTyp);
+       ctl.hcxt = LogicalRepRelMapContext;
+
+       /* This will usually be small. */
+       LogicalRepTypMap = hash_create("logicalrep type map cache", 2, &ctl,
+                                                                  HASH_ELEM | HASH_BLOBS |HASH_CONTEXT);
+
+       /* Watch for invalidation events. */
+       CacheRegisterRelcacheCallback(logicalrep_relmap_invalidate_cb,
+                                                                 (Datum) 0);
+       CacheRegisterSyscacheCallback(TYPEOID, logicalrep_typmap_invalidate_cb,
+                                                                 (Datum) 0);
+}
+
+/*
+ * Free the entry of a relation map cache.
+ */
+static void
+logicalrep_relmap_free_entry(LogicalRepRelMapEntry *entry)
+{
+       LogicalRepRelation *remoterel;
+
+       remoterel = &entry->remoterel;
+
+       pfree(remoterel->nspname);
+       pfree(remoterel->relname);
+
+       if (remoterel->natts > 0)
+       {
+               int     i;
+
+               for (i = 0; i < remoterel->natts; i++)
+                       pfree(remoterel->attnames[i]);
+
+               pfree(remoterel->attnames);
+               pfree(remoterel->atttyps);
+       }
+       remoterel->attnames = NULL;
+       remoterel->atttyps = NULL;
+
+       bms_free(remoterel->attkeys);
+       remoterel->attkeys = NULL;
+
+       if (entry->attrmap)
+               pfree(entry->attrmap);
+
+       entry->attrmap = NULL;
+       remoterel->natts = 0;
+       entry->localreloid = InvalidOid;
+       entry->localrel = NULL;
+}
+
+/*
+ * Add new entry or update existing entry in the relation map cache.
+ *
+ * Called when new relation mapping is sent by the publisher to update
+ * our expected view of incoming data from said publisher.
+ */
+void
+logicalrep_relmap_update(LogicalRepRelation *remoterel)
+{
+       MemoryContext                   oldctx;
+       LogicalRepRelMapEntry  *entry;
+       bool                                    found;
+       int                                             i;
+
+       if (LogicalRepRelMap == NULL)
+               logicalrep_relmap_init();
+
+       /*
+        * HASH_ENTER returns the existing entry if present or creates a new one.
+        */
+       entry = hash_search(LogicalRepRelMap, (void *) &remoterel->remoteid,
+                                               HASH_ENTER, &found);
+
+       if (found)
+               logicalrep_relmap_free_entry(entry);
+
+       /* Make cached copy of the data */
+       oldctx = MemoryContextSwitchTo(LogicalRepRelMapContext);
+       entry->remoterel.remoteid = remoterel->remoteid;
+       entry->remoterel.nspname = pstrdup(remoterel->nspname);
+       entry->remoterel.relname = pstrdup(remoterel->relname);
+       entry->remoterel.natts = remoterel->natts;
+       entry->remoterel.attnames = palloc(remoterel->natts * sizeof(char *));
+       entry->remoterel.atttyps = palloc(remoterel->natts * sizeof(Oid));
+       for (i = 0; i < remoterel->natts; i++)
+       {
+               entry->remoterel.attnames[i] = pstrdup(remoterel->attnames[i]);
+               entry->remoterel.atttyps[i] = remoterel->atttyps[i];
+       }
+       entry->remoterel.replident = remoterel->replident;
+       entry->remoterel.attkeys = bms_copy(remoterel->attkeys);
+       entry->attrmap = NULL;
+       entry->localreloid = InvalidOid;
+       MemoryContextSwitchTo(oldctx);
+}
+
+/*
+ * Find attribute index in TupleDesc struct by attribute name.
+ *
+ * Returns -1 if not found.
+ */
+static int
+logicalrep_rel_att_by_name(LogicalRepRelation *remoterel, const char *attname)
+{
+       int     i;
+
+       for (i = 0; i < remoterel->natts; i++)
+       {
+               if (strcmp(remoterel->attnames[i], attname) == 0)
+                       return i;
+       }
+
+       return -1;
+}
+
+/*
+ * Open the local relation associated with the remote one.
+ *
+ * Optionally rebuilds the Relcache mapping if it was invalidated
+ * by local DDL.
+ */
+LogicalRepRelMapEntry *
+logicalrep_rel_open(LogicalRepRelId remoteid, LOCKMODE lockmode)
+{
+       LogicalRepRelMapEntry  *entry;
+       bool            found;
+
+       if (LogicalRepRelMap == NULL)
+               logicalrep_relmap_init();
+
+       /* Search for existing entry. */
+       entry = hash_search(LogicalRepRelMap, (void *) &remoteid,
+                                               HASH_FIND, &found);
+
+       if (!found)
+               elog(ERROR, "no relation map entry for remote relation ID %u",
+                        remoteid);
+
+       /* Need to update the local cache? */
+       if (!OidIsValid(entry->localreloid))
+       {
+               Oid                     relid;
+               int                     i;
+               int                     found;
+               Bitmapset  *idkey;
+               TupleDesc       desc;
+               LogicalRepRelation *remoterel;
+               MemoryContext           oldctx;
+               remoterel = &entry->remoterel;
+
+               /* Try to find and lock the relation by name. */
+               relid = RangeVarGetRelid(makeRangeVar(remoterel->nspname,
+                                                                                         remoterel->relname, -1),
+                                                                lockmode, true);
+               if (!OidIsValid(relid))
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                                        errmsg("logical replication target relation \"%s.%s\" does not exist",
+                                                       remoterel->nspname, remoterel->relname)));
+               entry->localrel = heap_open(relid, NoLock);
+
+               /*
+                * We currently only support writing to regular and partitioned
+                * tables.
+                */
+               if (entry->localrel->rd_rel->relkind != RELKIND_RELATION)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                                        errmsg("logical replication target relation \"%s.%s\" is not a table",
+                                                       remoterel->nspname, remoterel->relname)));
+
+               /*
+                * Build the mapping of local attribute numbers to remote attribute
+                * numbers and validate that we don't miss any replicated columns
+                * as that would result in potentially unwanted data loss.
+                */
+               desc = RelationGetDescr(entry->localrel);
+               oldctx = MemoryContextSwitchTo(LogicalRepRelMapContext);
+               entry->attrmap = palloc(desc->natts * sizeof(int));
+               MemoryContextSwitchTo(oldctx);
+
+               found = 0;
+               for (i = 0; i < desc->natts; i++)
+               {
+                       int     attnum = logicalrep_rel_att_by_name(remoterel,
+                                                                                       NameStr(desc->attrs[i]->attname));
+                       entry->attrmap[i] = attnum;
+                       if (attnum >= 0)
+                               found++;
+               }
+
+               /* TODO, detail message with names of missing columns */
+               if (found < remoterel->natts)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                                        errmsg("logical replication target relation \"%s.%s\" is missing "
+                                                       "some replicated columns",
+                                                       remoterel->nspname, remoterel->relname)));
+
+               /*
+                * Check that replica identity matches. We allow for stricter replica
+                * identity (fewer columns) on subscriber as that will not stop us
+                * from finding unique tuple. IE, if publisher has identity
+                * (id,timestamp) and subscriber just (id) this will not be a problem,
+                * but in the opposite scenario it will.
+                *
+                * Don't throw any error here just mark the relation entry as not
+                * updatable, as replica identity is only for updates and deletes
+                * but inserts can be replicated even without it.
+                */
+               entry->updatable = true;
+               idkey = RelationGetIndexAttrBitmap(entry->localrel,
+                                                                                  INDEX_ATTR_BITMAP_IDENTITY_KEY);
+               /* fallback to PK if no replica identity */
+               if (idkey == NULL)
+               {
+                       idkey = RelationGetIndexAttrBitmap(entry->localrel,
+                                                                                          INDEX_ATTR_BITMAP_PRIMARY_KEY);
+                       /*
+                        * If no replica identity index and no PK, the published table
+                        * must have replica identity FULL.
+                        */
+                       if (idkey == NULL && remoterel->replident != REPLICA_IDENTITY_FULL)
+                               entry->updatable = false;
+               }
+
+               i = -1;
+               while ((i = bms_next_member(idkey, i)) >= 0)
+               {
+                       int attnum = i + FirstLowInvalidHeapAttributeNumber;
+
+                       if (!AttrNumberIsForUserDefinedAttr(attnum))
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                                                errmsg("logical replication target relation \"%s.%s\" uses "
+                                                               "system columns in REPLICA IDENTITY index",
+                                                               remoterel->nspname, remoterel->relname)));
+
+                       attnum = AttrNumberGetAttrOffset(attnum);
+
+                       if (!bms_is_member(entry->attrmap[attnum], remoterel->attkeys))
+                       {
+                               entry->updatable = false;
+                               break;
+                       }
+               }
+
+               entry->localreloid = relid;
+       }
+       else
+               entry->localrel = heap_open(entry->localreloid, lockmode);
+
+       return entry;
+}
+
+/*
+ * Close the previously opened logical relation.
+ */
+void
+logicalrep_rel_close(LogicalRepRelMapEntry *rel, LOCKMODE lockmode)
+{
+       heap_close(rel->localrel, lockmode);
+       rel->localrel = NULL;
+}
+
+
+/*
+ * Type cache invalidation callback for our type map cache.
+ */
+static void
+logicalrep_typmap_invalidate_cb(Datum arg, int cacheid, uint32 hashvalue)
+{
+       HASH_SEQ_STATUS status;
+       LogicalRepTyp  *entry;
+
+       /* Just to be sure. */
+       if (LogicalRepTypMap == NULL)
+               return;
+
+       /* invalidate all cache entries */
+       hash_seq_init(&status, LogicalRepTypMap);
+
+       while ((entry = (LogicalRepTyp *) hash_seq_search(&status)) != NULL)
+               entry->typoid = InvalidOid;
+}
+
+/*
+ * Free the type map cache entry data.
+ */
+static void
+logicalrep_typmap_free_entry(LogicalRepTyp *entry)
+{
+       pfree(entry->nspname);
+       pfree(entry->typname);
+
+       entry->typoid = InvalidOid;
+}
+
+/*
+ * Add new entry or update existing entry in the type map cache.
+ */
+void
+logicalrep_typmap_update(LogicalRepTyp *remotetyp)
+{
+       MemoryContext           oldctx;
+       LogicalRepTyp      *entry;
+       bool                            found;
+
+       if (LogicalRepTypMap == NULL)
+               logicalrep_relmap_init();
+
+       /*
+        * HASH_ENTER returns the existing entry if present or creates a new one.
+        */
+       entry = hash_search(LogicalRepTypMap, (void *) &remotetyp->remoteid,
+                                               HASH_ENTER, &found);
+
+       if (found)
+               logicalrep_typmap_free_entry(entry);
+
+       /* Make cached copy of the data */
+       entry->remoteid = remotetyp->remoteid;
+       oldctx = MemoryContextSwitchTo(LogicalRepRelMapContext);
+       entry->nspname = pstrdup(remotetyp->nspname);
+       entry->typname = pstrdup(remotetyp->typname);
+       MemoryContextSwitchTo(oldctx);
+       entry->typoid = InvalidOid;
+}
+
+/*
+ * Fetch type info from the cache.
+ */
+Oid
+logicalrep_typmap_getid(Oid remoteid)
+{
+       LogicalRepTyp      *entry;
+       bool                            found;
+       Oid                                     nspoid;
+
+       /* Internal types are mapped directly. */
+       if (remoteid < FirstNormalObjectId)
+       {
+               if (!get_typisdefined(remoteid))
+                       ereport(ERROR,
+                                       (errmsg("builtin type %u not found", remoteid),
+                                        errhint("This can be caused by having publisher with "
+                                                        "higher major version than subscriber")));
+               return remoteid;
+       }
+
+       if (LogicalRepTypMap == NULL)
+               logicalrep_relmap_init();
+
+       /* Try finding the mapping. */
+       entry = hash_search(LogicalRepTypMap, (void *) &remoteid,
+                                               HASH_FIND, &found);
+
+       if (!found)
+               elog(ERROR, "no type map entry for remote type %u",
+                        remoteid);
+
+       /* Found and mapped, return the oid. */
+       if (OidIsValid(entry->typoid))
+               return entry->typoid;
+
+       /* Otherwise, try to map to local type. */
+       nspoid = LookupExplicitNamespace(entry->nspname, true);
+       if (OidIsValid(nspoid))
+               entry->typoid = GetSysCacheOid2(TYPENAMENSP,
+                                                                               PointerGetDatum(entry->typname),
+                                                                               ObjectIdGetDatum(nspoid));
+       else
+               entry->typoid = InvalidOid;
+
+       if (!OidIsValid(entry->typoid))
+               ereport(ERROR,
+                               (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                                errmsg("data type \"%s.%s\" required for logical replication does not exist",
+                                               entry->nspname, entry->typname)));
+
+       return entry->typoid;
+}
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
new file mode 100644 (file)
index 0000000..7d86736
--- /dev/null
@@ -0,0 +1,1429 @@
+/*-------------------------------------------------------------------------
+ * worker.c
+ *        PostgreSQL logical replication worker (apply)
+ *
+ * Copyright (c) 2012-2016, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *       src/backend/replication/logical/worker.c
+ *
+ * NOTES
+ *       This file contains the worker which applies logical changes as they come
+ *       from remote logical replication stream.
+ *
+ *       The main worker (apply) is started by logical replication worker
+ *       launcher for every enabled subscription in a database. It uses
+ *       walsender protocol to communicate with publisher.
+ *
+ *       The apply worker may spawn additional workers (sync) for initial data
+ *       synchronization of tables.
+ *
+ *       This module includes server facing code and shares libpqwalreceiver
+ *       module with walreceiver for providing the libpq specific functionality.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "miscadmin.h"
+#include "pgstat.h"
+#include "funcapi.h"
+
+#include "access/xact.h"
+#include "access/xlog_internal.h"
+
+#include "catalog/namespace.h"
+#include "catalog/pg_subscription.h"
+
+#include "commands/trigger.h"
+
+#include "executor/executor.h"
+#include "executor/nodeModifyTable.h"
+
+#include "libpq/pqformat.h"
+#include "libpq/pqsignal.h"
+
+#include "mb/pg_wchar.h"
+
+#include "nodes/makefuncs.h"
+
+#include "optimizer/planner.h"
+
+#include "parser/parse_relation.h"
+
+#include "postmaster/bgworker.h"
+#include "postmaster/postmaster.h"
+
+#include "replication/decode.h"
+#include "replication/logical.h"
+#include "replication/logicalproto.h"
+#include "replication/logicalrelation.h"
+#include "replication/logicalworker.h"
+#include "replication/reorderbuffer.h"
+#include "replication/origin.h"
+#include "replication/snapbuild.h"
+#include "replication/walreceiver.h"
+#include "replication/worker_internal.h"
+
+#include "rewrite/rewriteHandler.h"
+
+#include "storage/bufmgr.h"
+#include "storage/ipc.h"
+#include "storage/lmgr.h"
+#include "storage/proc.h"
+#include "storage/procarray.h"
+
+#include "utils/builtins.h"
+#include "utils/catcache.h"
+#include "utils/datum.h"
+#include "utils/fmgroids.h"
+#include "utils/guc.h"
+#include "utils/inval.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/timeout.h"
+#include "utils/tqual.h"
+#include "utils/syscache.h"
+
+#define NAPTIME_PER_CYCLE 1000 /* max sleep time between cycles (1s) */
+
+typedef struct FlushPosition
+{
+       dlist_node node;
+       XLogRecPtr local_end;
+       XLogRecPtr remote_end;
+} FlushPosition;
+
+static dlist_head lsn_mapping = DLIST_STATIC_INIT(lsn_mapping);
+
+typedef struct SlotErrCallbackArg
+{
+       LogicalRepRelation      *rel;
+       int                     attnum;
+} SlotErrCallbackArg;
+
+static MemoryContext   ApplyContext = NULL;
+static MemoryContext   ApplyCacheContext = NULL;
+
+WalReceiverConn           *wrconn = NULL;
+
+Subscription      *MySubscription = NULL;
+bool                           MySubscriptionValid = false;
+
+bool                           in_remote_transaction = false;
+
+static void send_feedback(XLogRecPtr recvpos, bool force, bool requestReply);
+
+static void store_flush_position(XLogRecPtr remote_lsn);
+
+static void reread_subscription(void);
+
+/*
+ * Make sure that we started local transaction.
+ *
+ * Also switches to ApplyContext as necessary.
+ */
+static bool
+ensure_transaction(void)
+{
+       if (IsTransactionState())
+       {
+               if (CurrentMemoryContext != ApplyContext)
+                       MemoryContextSwitchTo(ApplyContext);
+               return false;
+       }
+
+       StartTransactionCommand();
+
+       if (!MySubscriptionValid)
+               reread_subscription();
+
+       MemoryContextSwitchTo(ApplyContext);
+       return true;
+}
+
+
+/*
+ * Executor state preparation for evaluation of constraint expressions,
+ * indexes and triggers.
+ *
+ * This is based on similar code in copy.c
+ */
+static EState *
+create_estate_for_relation(LogicalRepRelMapEntry *rel)
+{
+       EState     *estate;
+       ResultRelInfo *resultRelInfo;
+       RangeTblEntry *rte;
+
+       estate = CreateExecutorState();
+
+       rte = makeNode(RangeTblEntry);
+       rte->rtekind = RTE_RELATION;
+       rte->relid = RelationGetRelid(rel->localrel);
+       rte->relkind = rel->localrel->rd_rel->relkind;
+       estate->es_range_table = list_make1(rte);
+
+       resultRelInfo = makeNode(ResultRelInfo);
+       InitResultRelInfo(resultRelInfo, rel->localrel, 1, NULL, 0);
+
+       estate->es_result_relations = resultRelInfo;
+       estate->es_num_result_relations = 1;
+       estate->es_result_relation_info = resultRelInfo;
+
+       /* Triggers might need a slot */
+       if (resultRelInfo->ri_TrigDesc)
+               estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(estate);
+
+       return estate;
+}
+
+/*
+ * Executes default values for columns for which we can't map to remote
+ * relation columns.
+ *
+ * This allows us to support tables which have more columns on the downstream
+ * than on the upstream.
+ */
+static void
+slot_fill_defaults(LogicalRepRelMapEntry *rel, EState *estate,
+                                TupleTableSlot *slot)
+{
+       TupleDesc       desc = RelationGetDescr(rel->localrel);
+       int                     num_phys_attrs = desc->natts;
+       int                     i;
+       int                     attnum,
+                               num_defaults = 0;
+       int                *defmap;
+       ExprState **defexprs;
+       ExprContext *econtext;
+
+       econtext = GetPerTupleExprContext(estate);
+
+       /* We got all the data via replication, no need to evaluate anything. */
+       if (num_phys_attrs == rel->remoterel.natts)
+               return;
+
+       defmap = (int *) palloc(num_phys_attrs * sizeof(int));
+       defexprs = (ExprState **) palloc(num_phys_attrs * sizeof(ExprState *));
+
+       for (attnum = 0; attnum < num_phys_attrs; attnum++)
+       {
+               Expr       *defexpr;
+
+               if (desc->attrs[attnum]->attisdropped)
+                       continue;
+
+               if (rel->attrmap[attnum] >= 0)
+                       continue;
+
+               defexpr = (Expr *) build_column_default(rel->localrel, attnum + 1);
+
+               if (defexpr != NULL)
+               {
+                       /* Run the expression through planner */
+                       defexpr = expression_planner(defexpr);
+
+                       /* Initialize executable expression in copycontext */
+                       defexprs[num_defaults] = ExecInitExpr(defexpr, NULL);
+                       defmap[num_defaults] = attnum;
+                       num_defaults++;
+               }
+
+       }
+
+       for (i = 0; i < num_defaults; i++)
+               slot->tts_values[defmap[i]] =
+                       ExecEvalExpr(defexprs[i], econtext, &slot->tts_isnull[defmap[i]]);
+}
+
+/*
+ * Error callback to give more context info about type conversion failure.
+ */
+static void
+slot_store_error_callback(void *arg)
+{
+       SlotErrCallbackArg         *errarg = (SlotErrCallbackArg *) arg;
+       Oid             remotetypoid,
+                       localtypoid;
+
+       if (errarg->attnum < 0)
+               return;
+
+       remotetypoid = errarg->rel->atttyps[errarg->attnum];
+       localtypoid = logicalrep_typmap_getid(remotetypoid);
+       errcontext("processing remote data for replication target relation \"%s.%s\" column \"%s\", "
+                          "remote type %s, local type %s",
+                          errarg->rel->nspname, errarg->rel->relname,
+                          errarg->rel->attnames[errarg->attnum],
+                          format_type_be(remotetypoid),
+                          format_type_be(localtypoid));
+}
+
+/*
+ * Store data in C string form into slot.
+ * This is similar to BuildTupleFromCStrings but TupleTableSlot fits our
+ * use better.
+ */
+static void
+slot_store_cstrings(TupleTableSlot *slot, LogicalRepRelMapEntry *rel,
+                                 char **values)
+{
+       int             natts = slot->tts_tupleDescriptor->natts;
+       int             i;
+       SlotErrCallbackArg              errarg;
+       ErrorContextCallback    errcallback;
+
+       ExecClearTuple(slot);
+
+       /* Push callback + info on the error context stack */
+       errarg.rel = &rel->remoterel;
+       errarg.attnum = -1;
+       errcallback.callback = slot_store_error_callback;
+       errcallback.arg = (void *) &errarg;
+       errcallback.previous = error_context_stack;
+       error_context_stack = &errcallback;
+
+       /* Call the "in" function for each non-dropped attribute */
+       for (i = 0; i < natts; i++)
+       {
+               Form_pg_attribute       att = slot->tts_tupleDescriptor->attrs[i];
+               int                                     remoteattnum = rel->attrmap[i];
+
+               if (!att->attisdropped && remoteattnum >= 0 &&
+                       values[remoteattnum] != NULL)
+               {
+                       Oid typinput;
+                       Oid typioparam;
+
+                       errarg.attnum = remoteattnum;
+
+                       getTypeInputInfo(att->atttypid, &typinput, &typioparam);
+                       slot->tts_values[i] = OidInputFunctionCall(typinput,
+                                                                                                          values[remoteattnum],
+                                                                                                          typioparam,
+                                                                                                          att->atttypmod);
+                       slot->tts_isnull[i] = false;
+               }
+               else
+               {
+                       /*
+                        * We assign NULL to dropped attributes, NULL values, and missing
+                        * values (missing values should be later filled using
+                        * slot_fill_defaults).
+                        */
+                       slot->tts_values[i] = (Datum) 0;
+                       slot->tts_isnull[i] = true;
+               }
+       }
+
+       /* Pop the error context stack */
+       error_context_stack = errcallback.previous;
+
+       ExecStoreVirtualTuple(slot);
+}
+
+/*
+ * Modify slot with user data provided as C strigs.
+ * This is somewhat similar to heap_modify_tuple but also calls the type
+ * input fuction on the user data as the input is the text representation
+ * of the types.
+ */
+static void
+slot_modify_cstrings(TupleTableSlot *slot, LogicalRepRelMapEntry *rel,
+                                  char **values, bool *replaces)
+{
+       int             natts = slot->tts_tupleDescriptor->natts;
+       int             i;
+       SlotErrCallbackArg              errarg;
+       ErrorContextCallback    errcallback;
+
+       slot_getallattrs(slot);
+       ExecClearTuple(slot);
+
+       /* Push callback + info on the error context stack */
+       errarg.rel = &rel->remoterel;
+       errarg.attnum = -1;
+       errcallback.callback = slot_store_error_callback;
+       errcallback.arg = (void *) &errarg;
+       errcallback.previous = error_context_stack;
+       error_context_stack = &errcallback;
+
+       /* Call the "in" function for each replaced attribute */
+       for (i = 0; i < natts; i++)
+       {
+               Form_pg_attribute       att = slot->tts_tupleDescriptor->attrs[i];
+               int                                     remoteattnum = rel->attrmap[i];
+
+               if (remoteattnum >= 0 && !replaces[remoteattnum])
+                       continue;
+
+               if (remoteattnum >= 0 && values[remoteattnum] != NULL)
+               {
+                       Oid typinput;
+                       Oid typioparam;
+
+                       errarg.attnum = remoteattnum;
+
+                       getTypeInputInfo(att->atttypid, &typinput, &typioparam);
+                       slot->tts_values[i] = OidInputFunctionCall(typinput, values[i],
+                                                                                                          typioparam,
+                                                                                                          att->atttypmod);
+                       slot->tts_isnull[i] = false;
+               }
+               else
+               {
+                       slot->tts_values[i] = (Datum) 0;
+                       slot->tts_isnull[i] = true;
+               }
+       }
+
+       /* Pop the error context stack */
+       error_context_stack = errcallback.previous;
+
+       ExecStoreVirtualTuple(slot);
+}
+
+/*
+ * Handle BEGIN message.
+ */
+static void
+apply_handle_begin(StringInfo s)
+{
+       LogicalRepBeginData     begin_data;
+
+       logicalrep_read_begin(s, &begin_data);
+
+       replorigin_session_origin_timestamp = begin_data.committime;
+       replorigin_session_origin_lsn = begin_data.final_lsn;
+
+       in_remote_transaction = true;
+
+       pgstat_report_activity(STATE_RUNNING, NULL);
+}
+
+/*
+ * Handle COMMIT message.
+ *
+ * TODO, support tracking of multiple origins
+ */
+static void
+apply_handle_commit(StringInfo s)
+{
+       LogicalRepCommitData    commit_data;
+
+       logicalrep_read_commit(s, &commit_data);
+
+       Assert(commit_data.commit_lsn == replorigin_session_origin_lsn);
+       Assert(commit_data.committime == replorigin_session_origin_timestamp);
+
+       if (IsTransactionState())
+       {
+               CommitTransactionCommand();
+
+               store_flush_position(commit_data.end_lsn);
+       }
+
+       in_remote_transaction = false;
+
+       pgstat_report_activity(STATE_IDLE, NULL);
+}
+
+/*
+ * Handle ORIGIN message.
+ *
+ * TODO, support tracking of multiple origins
+ */
+static void
+apply_handle_origin(StringInfo s)
+{
+       /*
+        * ORIGIN message can only come inside remote transaction and before
+        * any actual writes.
+        */
+       if (!in_remote_transaction || IsTransactionState())
+               ereport(ERROR,
+                               (errcode(ERRCODE_PROTOCOL_VIOLATION),
+                                errmsg("ORIGIN message sent out of order")));
+}
+
+/*
+ * Handle RELATION message.
+ *
+ * Note we don't do validation against local schema here. The validation
+ * against local schema is postponed until first change for given relation
+ * comes as we only care about it when applying changes for it anyway and we
+ * do less locking this way.
+ */
+static void
+apply_handle_relation(StringInfo s)
+{
+       LogicalRepRelation  *rel;
+
+       rel = logicalrep_read_rel(s);
+       logicalrep_relmap_update(rel);
+}
+
+/*
+ * Handle TYPE message.
+ *
+ * Note we don't do local mapping here, that's done when the type is
+ * actually used.
+ */
+static void
+apply_handle_type(StringInfo s)
+{
+       LogicalRepTyp   typ;
+
+       logicalrep_read_typ(s, &typ);
+       logicalrep_typmap_update(&typ);
+}
+
+/*
+ * Get replica identity index or if it is not defined a primary key.
+ *
+ * If neither is defined, returns InvalidOid
+ */
+static Oid
+GetRelationIdentityOrPK(Relation rel)
+{
+       Oid     idxoid;
+
+       idxoid = RelationGetReplicaIndex(rel);
+
+       if (!OidIsValid(idxoid))
+               idxoid = RelationGetPrimaryKeyIndex(rel);
+
+       return idxoid;
+}
+
+/*
+ * Handle INSERT message.
+ */
+static void
+apply_handle_insert(StringInfo s)
+{
+       LogicalRepRelMapEntry *rel;
+       LogicalRepTupleData     newtup;
+       LogicalRepRelId         relid;
+       EState                     *estate;
+       TupleTableSlot     *remoteslot;
+       MemoryContext           oldctx;
+
+       ensure_transaction();
+
+       relid = logicalrep_read_insert(s, &newtup);
+       rel = logicalrep_rel_open(relid, RowExclusiveLock);
+
+       /* Initialize the executor state. */
+       estate = create_estate_for_relation(rel);
+       remoteslot = ExecInitExtraTupleSlot(estate);
+       ExecSetSlotDescriptor(remoteslot, RelationGetDescr(rel->localrel));
+
+       /* Process and store remote tuple in the slot */
+       oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
+       slot_store_cstrings(remoteslot, rel, newtup.values);
+       slot_fill_defaults(rel, estate, remoteslot);
+       MemoryContextSwitchTo(oldctx);
+
+       PushActiveSnapshot(GetTransactionSnapshot());
+       ExecOpenIndices(estate->es_result_relation_info, false);
+
+       /* Do the insert. */
+       ExecSimpleRelationInsert(estate, remoteslot);
+
+       /* Cleanup. */
+       ExecCloseIndices(estate->es_result_relation_info);
+       PopActiveSnapshot();
+       ExecResetTupleTable(estate->es_tupleTable, false);
+       FreeExecutorState(estate);
+
+       logicalrep_rel_close(rel, NoLock);
+
+       CommandCounterIncrement();
+}
+
+/*
+ * Check if the logical replication relation is updatable and throw
+ * appropriate error if it isn't.
+ */
+static void
+check_relation_updatable(LogicalRepRelMapEntry *rel)
+{
+       /* Updatable, no error. */
+       if (rel->updatable)
+               return;
+
+       /*
+        * We are in error mode so it's fine this is somewhat slow.
+        * It's better to give user correct error.
+        */
+       if (OidIsValid(GetRelationIdentityOrPK(rel->localrel)))
+       {
+               ereport(ERROR,
+                               (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                                errmsg("publisher does not send replica identity column "
+                                               "expected by the logical replication target relation \"%s.%s\"",
+                                               rel->remoterel.nspname, rel->remoterel.relname)));
+       }
+
+       ereport(ERROR,
+                       (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                        errmsg("logical replication target relation \"%s.%s\" has "
+                                       "neither REPLICA IDENTIY index nor PRIMARY "
+                                       "KEY and published relation does not have "
+                                       "REPLICA IDENTITY FULL",
+                                       rel->remoterel.nspname, rel->remoterel.relname)));
+}
+
+/*
+ * Handle UPDATE message.
+ *
+ * TODO: FDW support
+ */
+static void
+apply_handle_update(StringInfo s)
+{
+       LogicalRepRelMapEntry *rel;
+       LogicalRepRelId         relid;
+       Oid                                     idxoid;
+       EState                     *estate;
+       EPQState                        epqstate;
+       LogicalRepTupleData     oldtup;
+       LogicalRepTupleData     newtup;
+       bool                            has_oldtup;
+       TupleTableSlot     *localslot;
+       TupleTableSlot     *remoteslot;
+       bool                            found;
+       MemoryContext           oldctx;
+
+       ensure_transaction();
+
+       relid = logicalrep_read_update(s, &has_oldtup, &oldtup,
+                                                                  &newtup);
+       rel = logicalrep_rel_open(relid, RowExclusiveLock);
+
+       /* Check if we can do the update. */
+       check_relation_updatable(rel);
+
+       /* Initialize the executor state. */
+       estate = create_estate_for_relation(rel);
+       remoteslot = ExecInitExtraTupleSlot(estate);
+       ExecSetSlotDescriptor(remoteslot, RelationGetDescr(rel->localrel));
+       localslot = ExecInitExtraTupleSlot(estate);
+       ExecSetSlotDescriptor(localslot, RelationGetDescr(rel->localrel));
+       EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1);
+
+       PushActiveSnapshot(GetTransactionSnapshot());
+       ExecOpenIndices(estate->es_result_relation_info, false);
+
+       /* Build the search tuple. */
+       oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
+       slot_store_cstrings(remoteslot, rel,
+                                               has_oldtup ? oldtup.values : newtup.values);
+       MemoryContextSwitchTo(oldctx);
+
+       /*
+        * Try to find tuple using either replica identity index, primary key
+        * or if needed, sequential scan.
+        */
+       idxoid = GetRelationIdentityOrPK(rel->localrel);
+       Assert(OidIsValid(idxoid) ||
+                  (rel->remoterel.replident == REPLICA_IDENTITY_FULL && has_oldtup));
+
+       if (OidIsValid(idxoid))
+               found = RelationFindReplTupleByIndex(rel->localrel, idxoid,
+                                                                                        LockTupleExclusive,
+                                                                                        remoteslot, localslot);
+       else
+               found = RelationFindReplTupleSeq(rel->localrel, LockTupleExclusive,
+                                                                                remoteslot, localslot);
+
+       ExecClearTuple(remoteslot);
+
+       /*
+        * Tuple found.
+        *
+        * Note this will fail if there are other conflicting unique indexes.
+        */
+       if (found)
+       {
+               /* Process and store remote tuple in the slot */
+               oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
+               ExecStoreTuple(localslot->tts_tuple, remoteslot, InvalidBuffer, false);
+               slot_modify_cstrings(remoteslot, rel, newtup.values, newtup.changed);
+               MemoryContextSwitchTo(oldctx);
+
+               EvalPlanQualSetSlot(&epqstate, remoteslot);
+
+               /* Do the actual update. */
+               ExecSimpleRelationUpdate(estate, &epqstate, localslot, remoteslot);
+       }
+       else
+       {
+               /*
+                * The tuple to be updated could not be found.
+                *
+                * TODO what to do here, change the log level to LOG perhaps?
+                */
+               elog(DEBUG1,
+                        "logical replication did not find row for update "
+                        "in replication target relation \"%s\"",
+                        RelationGetRelationName(rel->localrel));
+       }
+
+       /* Cleanup. */
+       ExecCloseIndices(estate->es_result_relation_info);
+       PopActiveSnapshot();
+       EvalPlanQualEnd(&epqstate);
+       ExecResetTupleTable(estate->es_tupleTable, false);
+       FreeExecutorState(estate);
+
+       logicalrep_rel_close(rel, NoLock);
+
+       CommandCounterIncrement();
+}
+
+/*
+ * Handle DELETE message.
+ *
+ * TODO: FDW support
+ */
+static void
+apply_handle_delete(StringInfo s)
+{
+       LogicalRepRelMapEntry *rel;
+       LogicalRepTupleData     oldtup;
+       LogicalRepRelId         relid;
+       Oid                                     idxoid;
+       EState                     *estate;
+       EPQState                        epqstate;
+       TupleTableSlot     *remoteslot;
+       TupleTableSlot     *localslot;
+       bool                            found;
+       MemoryContext           oldctx;
+
+       ensure_transaction();
+
+       relid = logicalrep_read_delete(s, &oldtup);
+       rel = logicalrep_rel_open(relid, RowExclusiveLock);
+
+       /* Check if we can do the delete. */
+       check_relation_updatable(rel);
+
+       /* Initialize the executor state. */
+       estate = create_estate_for_relation(rel);
+       remoteslot = ExecInitExtraTupleSlot(estate);
+       ExecSetSlotDescriptor(remoteslot, RelationGetDescr(rel->localrel));
+       localslot = ExecInitExtraTupleSlot(estate);
+       ExecSetSlotDescriptor(localslot, RelationGetDescr(rel->localrel));
+       EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1);
+
+       PushActiveSnapshot(GetTransactionSnapshot());
+       ExecOpenIndices(estate->es_result_relation_info, false);
+
+       /* Find the tuple using the replica identity index. */
+       oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
+       slot_store_cstrings(remoteslot, rel, oldtup.values);
+       MemoryContextSwitchTo(oldctx);
+
+       /*
+        * Try to find tuple using either replica identity index, primary key
+        * or if needed, sequential scan.
+        */
+       idxoid = GetRelationIdentityOrPK(rel->localrel);
+       Assert(OidIsValid(idxoid) ||
+                  (rel->remoterel.replident == REPLICA_IDENTITY_FULL));
+
+       if (OidIsValid(idxoid))
+               found = RelationFindReplTupleByIndex(rel->localrel, idxoid,
+                                                                                        LockTupleExclusive,
+                                                                                        remoteslot, localslot);
+       else
+               found = RelationFindReplTupleSeq(rel->localrel, LockTupleExclusive,
+                                                                                remoteslot, localslot);
+       /* If found delete it. */
+       if (found)
+       {
+               EvalPlanQualSetSlot(&epqstate, localslot);
+
+               /* Do the actual delete. */
+               ExecSimpleRelationDelete(estate, &epqstate, localslot);
+       }
+       else
+       {
+               /* The tuple to be deleted could not be found.*/
+               ereport(DEBUG1,
+                               (errmsg("logical replication could not find row for delete "
+                                               "in replication target %s",
+                                               RelationGetRelationName(rel->localrel))));
+       }
+
+       /* Cleanup. */
+       ExecCloseIndices(estate->es_result_relation_info);
+       PopActiveSnapshot();
+       EvalPlanQualEnd(&epqstate);
+       ExecResetTupleTable(estate->es_tupleTable, false);
+       FreeExecutorState(estate);
+
+       logicalrep_rel_close(rel, NoLock);
+
+       CommandCounterIncrement();
+}
+
+
+/*
+ * Logical replication protocol message dispatcher.
+ */
+static void
+apply_dispatch(StringInfo s)
+{
+       char action = pq_getmsgbyte(s);
+
+       switch (action)
+       {
+               /* BEGIN */
+               case 'B':
+                       apply_handle_begin(s);
+                       break;
+               /* COMMIT */
+               case 'C':
+                       apply_handle_commit(s);
+                       break;
+               /* INSERT */
+               case 'I':
+                       apply_handle_insert(s);
+                       break;
+               /* UPDATE */
+               case 'U':
+                       apply_handle_update(s);
+                       break;
+               /* DELETE */
+               case 'D':
+                       apply_handle_delete(s);
+                       break;
+               /* RELATION */
+               case 'R':
+                       apply_handle_relation(s);
+                       break;
+               /* TYPE */
+               case 'Y':
+                       apply_handle_type(s);
+                       break;
+               /* ORIGIN */
+               case 'O':
+                       apply_handle_origin(s);
+                       break;
+               default:
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_PROTOCOL_VIOLATION),
+                                        errmsg("invalid logical replication message type %c", action)));
+       }
+}
+
+/*
+ * Figure out which write/flush positions to report to the walsender process.
+ *
+ * We can't simply report back the last LSN the walsender sent us because the
+ * local transaction might not yet be flushed to disk locally. Instead we
+ * build a list that associates local with remote LSNs for every commit. When
+ * reporting back the flush position to the sender we iterate that list and
+ * check which entries on it are already locally flushed. Those we can report
+ * as having been flushed.
+ *
+ * The have_pending_txes is true if there are outstanding transactions that
+ * need to be flushed.
+ */
+static void
+get_flush_position(XLogRecPtr *write, XLogRecPtr *flush,
+                                  bool *have_pending_txes)
+{
+       dlist_mutable_iter iter;
+       XLogRecPtr      local_flush = GetFlushRecPtr();
+
+       *write = InvalidXLogRecPtr;
+       *flush = InvalidXLogRecPtr;
+
+       dlist_foreach_modify(iter, &lsn_mapping)
+       {
+               FlushPosition *pos =
+                       dlist_container(FlushPosition, node, iter.cur);
+
+               *write = pos->remote_end;
+
+               if (pos->local_end <= local_flush)
+               {
+                       *flush = pos->remote_end;
+                       dlist_delete(iter.cur);
+                       pfree(pos);
+               }
+               else
+               {
+                       /*
+                        * Don't want to uselessly iterate over the rest of the list which
+                        * could potentially be long. Instead get the last element and
+                        * grab the write position from there.
+                        */
+                       pos = dlist_tail_element(FlushPosition, node,
+                                                                        &lsn_mapping);
+                       *write = pos->remote_end;
+                       *have_pending_txes = true;
+                       return;
+               }
+       }
+
+       *have_pending_txes = !dlist_is_empty(&lsn_mapping);
+}
+
+/*
+ * Store current remote/local lsn pair in the tracking list.
+ */
+static void
+store_flush_position(XLogRecPtr remote_lsn)
+{
+       FlushPosition *flushpos;
+
+       /* Need to do this in permanent context */
+       MemoryContextSwitchTo(ApplyCacheContext);
+
+       /* Track commit lsn  */
+       flushpos = (FlushPosition *) palloc(sizeof(FlushPosition));
+       flushpos->local_end = XactLastCommitEnd;
+       flushpos->remote_end = remote_lsn;
+
+       dlist_push_tail(&lsn_mapping, &flushpos->node);
+       MemoryContextSwitchTo(ApplyContext);
+}
+
+
+/* Update statistics of the worker. */
+static void
+UpdateWorkerStats(XLogRecPtr last_lsn, TimestampTz send_time, bool reply)
+{
+       MyLogicalRepWorker->last_lsn = last_lsn;
+       MyLogicalRepWorker->last_send_time = send_time;
+       MyLogicalRepWorker->last_recv_time = GetCurrentTimestamp();
+       if (reply)
+       {
+               MyLogicalRepWorker->reply_lsn = last_lsn;
+               MyLogicalRepWorker->reply_time = send_time;
+       }
+}
+
+/*
+ * Apply main loop.
+ */
+static void
+ApplyLoop(void)
+{
+       XLogRecPtr      last_received = InvalidXLogRecPtr;
+
+       /* Init the ApplyContext which we use for easier cleanup. */
+       ApplyContext = AllocSetContextCreate(TopMemoryContext,
+                                                                                "ApplyContext",
+                                                                                ALLOCSET_DEFAULT_MINSIZE,
+                                                                                ALLOCSET_DEFAULT_INITSIZE,
+                                                                                ALLOCSET_DEFAULT_MAXSIZE);
+
+       /* mark as idle, before starting to loop */
+       pgstat_report_activity(STATE_IDLE, NULL);
+
+       while (!got_SIGTERM)
+       {
+               pgsocket        fd = PGINVALID_SOCKET;
+               int                     rc;
+               int                     len;
+               char       *buf = NULL;
+               bool            endofstream = false;
+               TimestampTz last_recv_timestamp = GetCurrentTimestamp();
+               bool            ping_sent = false;
+
+               MemoryContextSwitchTo(ApplyContext);
+
+               len = walrcv_receive(wrconn, &buf, &fd);
+
+               if (len != 0)
+               {
+                       /* Process the data */
+                       for (;;)
+                       {
+                               CHECK_FOR_INTERRUPTS();
+
+                               if (len == 0)
+                               {
+                                       break;
+                               }
+                               else if (len < 0)
+                               {
+                                       ereport(LOG,
+                                                       (errmsg("data stream from publisher has ended")));
+                                       endofstream = true;
+                                       break;
+                               }
+                               else
+                               {
+                                       int c;
+                                       StringInfoData s;
+
+                                       /* Reset timeout. */
+                                       last_recv_timestamp = GetCurrentTimestamp();
+                                       ping_sent = false;
+
+                                       /* Ensure we are reading the data into our memory context. */
+                                       MemoryContextSwitchTo(ApplyContext);
+
+                                       s.data = buf;
+                                       s.len = len;
+                                       s.cursor = 0;
+                                       s.maxlen = -1;
+
+                                       c = pq_getmsgbyte(&s);
+
+                                       if (c == 'w')
+                                       {
+                                               XLogRecPtr      start_lsn;
+                                               XLogRecPtr      end_lsn;
+                                               TimestampTz     send_time;
+
+                                               start_lsn = pq_getmsgint64(&s);
+                                               end_lsn = pq_getmsgint64(&s);
+                                               send_time =
+                                                       IntegerTimestampToTimestampTz(pq_getmsgint64(&s));
+
+                                               if (last_received < start_lsn)
+                                                       last_received = start_lsn;
+
+                                               if (last_received < end_lsn)
+                                                       last_received = end_lsn;
+
+                                               UpdateWorkerStats(last_received, send_time, false);
+
+                                               apply_dispatch(&s);
+                                       }
+                                       else if (c == 'k')
+                                       {
+                                               XLogRecPtr endpos;
+                                               TimestampTz     timestamp;
+                                               bool reply_requested;
+
+                                               endpos = pq_getmsgint64(&s);
+                                               timestamp =
+                                                       IntegerTimestampToTimestampTz(pq_getmsgint64(&s));
+                                               reply_requested = pq_getmsgbyte(&s);
+
+                                               send_feedback(endpos, reply_requested, false);
+                                               UpdateWorkerStats(last_received, timestamp, true);
+                                       }
+                                       /* other message types are purposefully ignored */
+                               }
+
+                               len = walrcv_receive(wrconn, &buf, &fd);
+                       }
+               }
+
+               if (!in_remote_transaction)
+               {
+                       /*
+                        * If we didn't get any transactions for a while there might be
+                        * unconsumed invalidation messages in the queue, consume them now.
+                        */
+                       StartTransactionCommand();
+                       /* Check for subscription change */
+                       if (!MySubscriptionValid)
+                               reread_subscription();
+                       CommitTransactionCommand();
+               }
+
+               /* confirm all writes at once */
+               send_feedback(last_received, false, false);
+
+               /* Cleanup the memory. */
+               MemoryContextResetAndDeleteChildren(ApplyContext);
+               MemoryContextSwitchTo(TopMemoryContext);
+
+               /* Check if we need to exit the streaming loop. */
+               if (endofstream)
+                       break;
+
+               /*
+                * Wait for more data or latch.
+                */
+               rc = WaitLatchOrSocket(&MyProc->procLatch,
+                                                          WL_SOCKET_READABLE | WL_LATCH_SET |
+                                                          WL_TIMEOUT | WL_POSTMASTER_DEATH,
+                                                          fd, NAPTIME_PER_CYCLE,
+                                                          WAIT_EVENT_LOGICAL_APPLY_MAIN);
+
+               /* Emergency bailout if postmaster has died */
+               if (rc & WL_POSTMASTER_DEATH)
+                       proc_exit(1);
+
+               if (rc & WL_TIMEOUT)
+               {
+                       /*
+                        * We didn't receive anything new. If we haven't heard
+                        * anything from the server for more than
+                        * wal_receiver_timeout / 2, ping the server. Also, if
+                        * it's been longer than wal_receiver_status_interval
+                        * since the last update we sent, send a status update to
+                        * the master anyway, to report any progress in applying
+                        * WAL.
+                        */
+                       bool            requestReply = false;
+
+                       /*
+                        * Check if time since last receive from standby has
+                        * reached the configured limit.
+                        */
+                       if (wal_receiver_timeout > 0)
+                       {
+                               TimestampTz now = GetCurrentTimestamp();
+                               TimestampTz timeout;
+
+                               timeout =
+                                       TimestampTzPlusMilliseconds(last_recv_timestamp,
+                                                                                               wal_receiver_timeout);
+
+                               if (now >= timeout)
+                                       ereport(ERROR,
+                                                       (errmsg("terminating logical replication worker due to timeout")));
+
+                               /*
+                                * We didn't receive anything new, for half of
+                                * receiver replication timeout. Ping the server.
+                                */
+                               if (!ping_sent)
+                               {
+                                       timeout = TimestampTzPlusMilliseconds(last_recv_timestamp,
+                                                                                                                 (wal_receiver_timeout / 2));
+                                       if (now >= timeout)
+                                       {
+                                               requestReply = true;
+                                               ping_sent = true;
+                                       }
+                               }
+                       }
+
+                       send_feedback(last_received, requestReply, requestReply);
+               }
+
+               ResetLatch(&MyProc->procLatch);
+       }
+}
+
+/*
+ * Send a Standby Status Update message to server.
+ *
+ * 'recvpos' is the latest LSN we've received data to, force is set if we need
+ * to send a response to avoid timeouts.
+ */
+static void
+send_feedback(XLogRecPtr recvpos, bool force, bool requestReply)
+{
+       static StringInfo       reply_message = NULL;
+       static TimestampTz      send_time = 0;
+
+       static XLogRecPtr last_recvpos = InvalidXLogRecPtr;
+       static XLogRecPtr last_writepos = InvalidXLogRecPtr;
+       static XLogRecPtr last_flushpos = InvalidXLogRecPtr;
+
+       XLogRecPtr writepos;
+       XLogRecPtr flushpos;
+       TimestampTz now;
+       bool have_pending_txes;
+
+       /*
+        * If the user doesn't want status to be reported to the publisher, be
+        * sure to exit before doing anything at all.
+        */
+       if (!force && wal_receiver_status_interval <= 0)
+     &n