Refactor COPY TO to use format callback functions.
authorMasahiko Sawada <msawada@postgresql.org>
Thu, 27 Feb 2025 23:03:52 +0000 (15:03 -0800)
committerMasahiko Sawada <msawada@postgresql.org>
Thu, 27 Feb 2025 23:03:52 +0000 (15:03 -0800)
This commit introduces a new CopyToRoutine struct, which is a set of
callback routines to copy tuples in a specific format. It also makes
the existing formats (text, CSV, and binary) utilize these format
callbacks.

This change is a preliminary step towards making the COPY TO command
extensible in terms of output formats.

Additionally, this refactoring contributes to a performance
improvement by reducing the number of "if" branches that need to be
checked on a per-row basis when sending field representations in text
or CSV mode. The performance benchmark results showed ~5% performance
gain in text or CSV mode.

Author: Sutou Kouhei <kou@clear-code.com>
Reviewed-by: Masahiko Sawada <sawada.mshk@gmail.com>
Reviewed-by: Michael Paquier <michael@paquier.xyz>
Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Tomas Vondra <tomas.vondra@enterprisedb.com>
Reviewed-by: Junwang Zhao <zhjwpku@gmail.com>
Discussion: https://postgr.es/m/20231204.153548.2126325458835528809.kou@clear-code.com

src/backend/commands/copyto.c
src/include/commands/copyapi.h [new file with mode: 0644]
src/tools/pgindent/typedefs.list

index 091fbc12cc512a752e3f8575451d66526f00a492..721d29f8e53acbd4dab2542724dc2292467bfbfe 100644 (file)
@@ -19,7 +19,7 @@
 #include <sys/stat.h>
 
 #include "access/tableam.h"
-#include "commands/copy.h"
+#include "commands/copyapi.h"
 #include "commands/progress.h"
 #include "executor/execdesc.h"
 #include "executor/executor.h"
@@ -64,6 +64,9 @@ typedef enum CopyDest
  */
 typedef struct CopyToStateData
 {
+   /* format-specific routines */
+   const CopyToRoutine *routine;
+
    /* low-level state data */
    CopyDest    copy_dest;      /* type of copy source/destination */
    FILE       *copy_file;      /* used if copy_dest == COPY_FILE */
@@ -114,6 +117,19 @@ static void CopyAttributeOutText(CopyToState cstate, const char *string);
 static void CopyAttributeOutCSV(CopyToState cstate, const char *string,
                                bool use_quote);
 
+/* built-in format-specific routines */
+static void CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc);
+static void CopyToTextLikeOutFunc(CopyToState cstate, Oid atttypid, FmgrInfo *finfo);
+static void CopyToTextOneRow(CopyToState cstate, TupleTableSlot *slot);
+static void CopyToCSVOneRow(CopyToState cstate, TupleTableSlot *slot);
+static void CopyToTextLikeOneRow(CopyToState cstate, TupleTableSlot *slot,
+                                bool is_csv);
+static void CopyToTextLikeEnd(CopyToState cstate);
+static void CopyToBinaryStart(CopyToState cstate, TupleDesc tupDesc);
+static void CopyToBinaryOutFunc(CopyToState cstate, Oid atttypid, FmgrInfo *finfo);
+static void CopyToBinaryOneRow(CopyToState cstate, TupleTableSlot *slot);
+static void CopyToBinaryEnd(CopyToState cstate);
+
 /* Low-level communications functions */
 static void SendCopyBegin(CopyToState cstate);
 static void SendCopyEnd(CopyToState cstate);
@@ -121,9 +137,251 @@ static void CopySendData(CopyToState cstate, const void *databuf, int datasize);
 static void CopySendString(CopyToState cstate, const char *str);
 static void CopySendChar(CopyToState cstate, char c);
 static void CopySendEndOfRow(CopyToState cstate);
+static void CopySendTextLikeEndOfRow(CopyToState cstate);
 static void CopySendInt32(CopyToState cstate, int32 val);
 static void CopySendInt16(CopyToState cstate, int16 val);
 
+/*
+ * COPY TO routines for built-in formats.
+ *
+ * CSV and text formats share the same TextLike routines except for the
+ * one-row callback.
+ */
+
+/* text format */
+static const CopyToRoutine CopyToRoutineText = {
+   .CopyToStart = CopyToTextLikeStart,
+   .CopyToOutFunc = CopyToTextLikeOutFunc,
+   .CopyToOneRow = CopyToTextOneRow,
+   .CopyToEnd = CopyToTextLikeEnd,
+};
+
+/* CSV format */
+static const CopyToRoutine CopyToRoutineCSV = {
+   .CopyToStart = CopyToTextLikeStart,
+   .CopyToOutFunc = CopyToTextLikeOutFunc,
+   .CopyToOneRow = CopyToCSVOneRow,
+   .CopyToEnd = CopyToTextLikeEnd,
+};
+
+/* binary format */
+static const CopyToRoutine CopyToRoutineBinary = {
+   .CopyToStart = CopyToBinaryStart,
+   .CopyToOutFunc = CopyToBinaryOutFunc,
+   .CopyToOneRow = CopyToBinaryOneRow,
+   .CopyToEnd = CopyToBinaryEnd,
+};
+
+/* Return a COPY TO routine for the given options */
+static const CopyToRoutine *
+CopyToGetRoutine(CopyFormatOptions opts)
+{
+   if (opts.csv_mode)
+       return &CopyToRoutineCSV;
+   else if (opts.binary)
+       return &CopyToRoutineBinary;
+
+   /* default is text */
+   return &CopyToRoutineText;
+}
+
+/* Implementation of the start callback for text and CSV formats */
+static void
+CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc)
+{
+   /*
+    * For non-binary copy, we need to convert null_print to file encoding,
+    * because it will be sent directly with CopySendString.
+    */
+   if (cstate->need_transcoding)
+       cstate->opts.null_print_client = pg_server_to_any(cstate->opts.null_print,
+                                                         cstate->opts.null_print_len,
+                                                         cstate->file_encoding);
+
+   /* if a header has been requested send the line */
+   if (cstate->opts.header_line)
+   {
+       ListCell   *cur;
+       bool        hdr_delim = false;
+
+       foreach(cur, cstate->attnumlist)
+       {
+           int         attnum = lfirst_int(cur);
+           char       *colname;
+
+           if (hdr_delim)
+               CopySendChar(cstate, cstate->opts.delim[0]);
+           hdr_delim = true;
+
+           colname = NameStr(TupleDescAttr(tupDesc, attnum - 1)->attname);
+
+           if (cstate->opts.csv_mode)
+               CopyAttributeOutCSV(cstate, colname, false);
+           else
+               CopyAttributeOutText(cstate, colname);
+       }
+
+       CopySendTextLikeEndOfRow(cstate);
+   }
+}
+
+/*
+ * Implementation of the outfunc callback for text and CSV formats. Assign
+ * the output function data to the given *finfo.
+ */
+static void
+CopyToTextLikeOutFunc(CopyToState cstate, Oid atttypid, FmgrInfo *finfo)
+{
+   Oid         func_oid;
+   bool        is_varlena;
+
+   /* Set output function for an attribute */
+   getTypeOutputInfo(atttypid, &func_oid, &is_varlena);
+   fmgr_info(func_oid, finfo);
+}
+
+/* Implementation of the per-row callback for text format */
+static void
+CopyToTextOneRow(CopyToState cstate, TupleTableSlot *slot)
+{
+   CopyToTextLikeOneRow(cstate, slot, false);
+}
+
+/* Implementation of the per-row callback for CSV format */
+static void
+CopyToCSVOneRow(CopyToState cstate, TupleTableSlot *slot)
+{
+   CopyToTextLikeOneRow(cstate, slot, true);
+}
+
+/*
+ * Workhorse for CopyToTextOneRow() and CopyToCSVOneRow().
+ *
+ * We use pg_attribute_always_inline to reduce function call overhead
+ * and to help compilers to optimize away the 'is_csv' condition.
+ */
+static pg_attribute_always_inline void
+CopyToTextLikeOneRow(CopyToState cstate,
+                    TupleTableSlot *slot,
+                    bool is_csv)
+{
+   bool        need_delim = false;
+   FmgrInfo   *out_functions = cstate->out_functions;
+
+   foreach_int(attnum, cstate->attnumlist)
+   {
+       Datum       value = slot->tts_values[attnum - 1];
+       bool        isnull = slot->tts_isnull[attnum - 1];
+
+       if (need_delim)
+           CopySendChar(cstate, cstate->opts.delim[0]);
+       need_delim = true;
+
+       if (isnull)
+       {
+           CopySendString(cstate, cstate->opts.null_print_client);
+       }
+       else
+       {
+           char       *string;
+
+           string = OutputFunctionCall(&out_functions[attnum - 1],
+                                       value);
+
+           if (is_csv)
+               CopyAttributeOutCSV(cstate, string,
+                                   cstate->opts.force_quote_flags[attnum - 1]);
+           else
+               CopyAttributeOutText(cstate, string);
+       }
+   }
+
+   CopySendTextLikeEndOfRow(cstate);
+}
+
+/* Implementation of the end callback for text and CSV formats */
+static void
+CopyToTextLikeEnd(CopyToState cstate)
+{
+   /* Nothing to do here */
+}
+
+/*
+ * Implementation of the start callback for binary format. Send a header
+ * for a binary copy.
+ */
+static void
+CopyToBinaryStart(CopyToState cstate, TupleDesc tupDesc)
+{
+   int32       tmp;
+
+   /* Signature */
+   CopySendData(cstate, BinarySignature, 11);
+   /* Flags field */
+   tmp = 0;
+   CopySendInt32(cstate, tmp);
+   /* No header extension */
+   tmp = 0;
+   CopySendInt32(cstate, tmp);
+}
+
+/*
+ * Implementation of the outfunc callback for binary format. Assign
+ * the binary output function to the given *finfo.
+ */
+static void
+CopyToBinaryOutFunc(CopyToState cstate, Oid atttypid, FmgrInfo *finfo)
+{
+   Oid         func_oid;
+   bool        is_varlena;
+
+   /* Set output function for an attribute */
+   getTypeBinaryOutputInfo(atttypid, &func_oid, &is_varlena);
+   fmgr_info(func_oid, finfo);
+}
+
+/* Implementation of the per-row callback for binary format */
+static void
+CopyToBinaryOneRow(CopyToState cstate, TupleTableSlot *slot)
+{
+   FmgrInfo   *out_functions = cstate->out_functions;
+
+   /* Binary per-tuple header */
+   CopySendInt16(cstate, list_length(cstate->attnumlist));
+
+   foreach_int(attnum, cstate->attnumlist)
+   {
+       Datum       value = slot->tts_values[attnum - 1];
+       bool        isnull = slot->tts_isnull[attnum - 1];
+
+       if (isnull)
+       {
+           CopySendInt32(cstate, -1);
+       }
+       else
+       {
+           bytea      *outputbytes;
+
+           outputbytes = SendFunctionCall(&out_functions[attnum - 1],
+                                          value);
+           CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
+           CopySendData(cstate, VARDATA(outputbytes),
+                        VARSIZE(outputbytes) - VARHDRSZ);
+       }
+   }
+
+   CopySendEndOfRow(cstate);
+}
+
+/* Implementation of the end callback for binary format */
+static void
+CopyToBinaryEnd(CopyToState cstate)
+{
+   /* Generate trailer for a binary copy */
+   CopySendInt16(cstate, -1);
+   /* Need to flush out the trailer */
+   CopySendEndOfRow(cstate);
+}
 
 /*
  * Send copy start/stop messages for frontend copies.  These have changed
@@ -191,16 +449,6 @@ CopySendEndOfRow(CopyToState cstate)
    switch (cstate->copy_dest)
    {
        case COPY_FILE:
-           if (!cstate->opts.binary)
-           {
-               /* Default line termination depends on platform */
-#ifndef WIN32
-               CopySendChar(cstate, '\n');
-#else
-               CopySendString(cstate, "\r\n");
-#endif
-           }
-
            if (fwrite(fe_msgbuf->data, fe_msgbuf->len, 1,
                       cstate->copy_file) != 1 ||
                ferror(cstate->copy_file))
@@ -235,10 +483,6 @@ CopySendEndOfRow(CopyToState cstate)
            }
            break;
        case COPY_FRONTEND:
-           /* The FE/BE protocol uses \n as newline for all platforms */
-           if (!cstate->opts.binary)
-               CopySendChar(cstate, '\n');
-
            /* Dump the accumulated row as one CopyData message */
            (void) pq_putmessage(PqMsg_CopyData, fe_msgbuf->data, fe_msgbuf->len);
            break;
@@ -254,6 +498,35 @@ CopySendEndOfRow(CopyToState cstate)
    resetStringInfo(fe_msgbuf);
 }
 
+/*
+ * Wrapper function of CopySendEndOfRow for text and CSV formats. Sends the
+ * line termination and do common appropriate things for the end of row.
+ */
+static inline void
+CopySendTextLikeEndOfRow(CopyToState cstate)
+{
+   switch (cstate->copy_dest)
+   {
+       case COPY_FILE:
+           /* Default line termination depends on platform */
+#ifndef WIN32
+           CopySendChar(cstate, '\n');
+#else
+           CopySendString(cstate, "\r\n");
+#endif
+           break;
+       case COPY_FRONTEND:
+           /* The FE/BE protocol uses \n as newline for all platforms */
+           CopySendChar(cstate, '\n');
+           break;
+       default:
+           break;
+   }
+
+   /* Now take the actions related to the end of a row */
+   CopySendEndOfRow(cstate);
+}
+
 /*
  * These functions do apply some data conversion
  */
@@ -426,6 +699,9 @@ BeginCopyTo(ParseState *pstate,
    /* Extract options from the statement node tree */
    ProcessCopyOptions(pstate, &cstate->opts, false /* is_from */ , options);
 
+   /* Set format routine */
+   cstate->routine = CopyToGetRoutine(cstate->opts);
+
    /* Process the source/target relation or query */
    if (rel)
    {
@@ -772,19 +1048,10 @@ DoCopyTo(CopyToState cstate)
    foreach(cur, cstate->attnumlist)
    {
        int         attnum = lfirst_int(cur);
-       Oid         out_func_oid;
-       bool        isvarlena;
        Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
 
-       if (cstate->opts.binary)
-           getTypeBinaryOutputInfo(attr->atttypid,
-                                   &out_func_oid,
-                                   &isvarlena);
-       else
-           getTypeOutputInfo(attr->atttypid,
-                             &out_func_oid,
-                             &isvarlena);
-       fmgr_info(out_func_oid, &cstate->out_functions[attnum - 1]);
+       cstate->routine->CopyToOutFunc(cstate, attr->atttypid,
+                                      &cstate->out_functions[attnum - 1]);
    }
 
    /*
@@ -797,56 +1064,7 @@ DoCopyTo(CopyToState cstate)
                                               "COPY TO",
                                               ALLOCSET_DEFAULT_SIZES);
 
-   if (cstate->opts.binary)
-   {
-       /* Generate header for a binary copy */
-       int32       tmp;
-
-       /* Signature */
-       CopySendData(cstate, BinarySignature, 11);
-       /* Flags field */
-       tmp = 0;
-       CopySendInt32(cstate, tmp);
-       /* No header extension */
-       tmp = 0;
-       CopySendInt32(cstate, tmp);
-   }
-   else
-   {
-       /*
-        * For non-binary copy, we need to convert null_print to file
-        * encoding, because it will be sent directly with CopySendString.
-        */
-       if (cstate->need_transcoding)
-           cstate->opts.null_print_client = pg_server_to_any(cstate->opts.null_print,
-                                                             cstate->opts.null_print_len,
-                                                             cstate->file_encoding);
-
-       /* if a header has been requested send the line */
-       if (cstate->opts.header_line)
-       {
-           bool        hdr_delim = false;
-
-           foreach(cur, cstate->attnumlist)
-           {
-               int         attnum = lfirst_int(cur);
-               char       *colname;
-
-               if (hdr_delim)
-                   CopySendChar(cstate, cstate->opts.delim[0]);
-               hdr_delim = true;
-
-               colname = NameStr(TupleDescAttr(tupDesc, attnum - 1)->attname);
-
-               if (cstate->opts.csv_mode)
-                   CopyAttributeOutCSV(cstate, colname, false);
-               else
-                   CopyAttributeOutText(cstate, colname);
-           }
-
-           CopySendEndOfRow(cstate);
-       }
-   }
+   cstate->routine->CopyToStart(cstate, tupDesc);
 
    if (cstate->rel)
    {
@@ -885,13 +1103,7 @@ DoCopyTo(CopyToState cstate)
        processed = ((DR_copy *) cstate->queryDesc->dest)->processed;
    }
 
-   if (cstate->opts.binary)
-   {
-       /* Generate trailer for a binary copy */
-       CopySendInt16(cstate, -1);
-       /* Need to flush out the trailer */
-       CopySendEndOfRow(cstate);
-   }
+   cstate->routine->CopyToEnd(cstate);
 
    MemoryContextDelete(cstate->rowcontext);
 
@@ -904,74 +1116,18 @@ DoCopyTo(CopyToState cstate)
 /*
  * Emit one row during DoCopyTo().
  */
-static void
+static inline void
 CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot)
 {
-   FmgrInfo   *out_functions = cstate->out_functions;
    MemoryContext oldcontext;
 
    MemoryContextReset(cstate->rowcontext);
    oldcontext = MemoryContextSwitchTo(cstate->rowcontext);
 
-   if (cstate->opts.binary)
-   {
-       /* Binary per-tuple header */
-       CopySendInt16(cstate, list_length(cstate->attnumlist));
-   }
-
    /* Make sure the tuple is fully deconstructed */
    slot_getallattrs(slot);
 
-   if (!cstate->opts.binary)
-   {
-       bool        need_delim = false;
-
-       foreach_int(attnum, cstate->attnumlist)
-       {
-           Datum       value = slot->tts_values[attnum - 1];
-           bool        isnull = slot->tts_isnull[attnum - 1];
-           char       *string;
-
-           if (need_delim)
-               CopySendChar(cstate, cstate->opts.delim[0]);
-           need_delim = true;
-
-           if (isnull)
-               CopySendString(cstate, cstate->opts.null_print_client);
-           else
-           {
-               string = OutputFunctionCall(&out_functions[attnum - 1],
-                                           value);
-               if (cstate->opts.csv_mode)
-                   CopyAttributeOutCSV(cstate, string,
-                                       cstate->opts.force_quote_flags[attnum - 1]);
-               else
-                   CopyAttributeOutText(cstate, string);
-           }
-       }
-   }
-   else
-   {
-       foreach_int(attnum, cstate->attnumlist)
-       {
-           Datum       value = slot->tts_values[attnum - 1];
-           bool        isnull = slot->tts_isnull[attnum - 1];
-           bytea      *outputbytes;
-
-           if (isnull)
-               CopySendInt32(cstate, -1);
-           else
-           {
-               outputbytes = SendFunctionCall(&out_functions[attnum - 1],
-                                              value);
-               CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
-               CopySendData(cstate, VARDATA(outputbytes),
-                            VARSIZE(outputbytes) - VARHDRSZ);
-           }
-       }
-   }
-
-   CopySendEndOfRow(cstate);
+   cstate->routine->CopyToOneRow(cstate, slot);
 
    MemoryContextSwitchTo(oldcontext);
 }
diff --git a/src/include/commands/copyapi.h b/src/include/commands/copyapi.h
new file mode 100644 (file)
index 0000000..bd2d386
--- /dev/null
@@ -0,0 +1,57 @@
+/*-------------------------------------------------------------------------
+ *
+ * copyapi.h
+ *   API for COPY TO handlers
+ *
+ *
+ * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/commands/copyapi.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef COPYAPI_H
+#define COPYAPI_H
+
+#include "commands/copy.h"
+
+/*
+ * API structure for a COPY TO format implementation. Note this must be
+ * allocated in a server-lifetime manner, typically as a static const struct.
+ */
+typedef struct CopyToRoutine
+{
+   /*
+    * Set output function information. This callback is called once at the
+    * beginning of COPY TO.
+    *
+    * 'finfo' can be optionally filled to provide the catalog information of
+    * the output function.
+    *
+    * 'atttypid' is the OID of data type used by the relation's attribute.
+    */
+   void        (*CopyToOutFunc) (CopyToState cstate, Oid atttypid,
+                                 FmgrInfo *finfo);
+
+   /*
+    * Start a COPY TO. This callback is called once at the beginning of COPY
+    * TO.
+    *
+    * 'tupDesc' is the tuple descriptor of the relation from where the data
+    * is read.
+    */
+   void        (*CopyToStart) (CopyToState cstate, TupleDesc tupDesc);
+
+   /*
+    * Write one row stored in 'slot' to the destination.
+    */
+   void        (*CopyToOneRow) (CopyToState cstate, TupleTableSlot *slot);
+
+   /*
+    * End a COPY TO. This callback is called once at the end of COPY TO.
+    */
+   void        (*CopyToEnd) (CopyToState cstate);
+} CopyToRoutine;
+
+#endif                         /* COPYAPI_H */
index cfbab589d613e73b26af006148538ca52463462f..fcb968e1ffe6d244260ffe37c83035fcafa31d68 100644 (file)
@@ -512,6 +512,7 @@ CopyMultiInsertInfo
 CopyOnErrorChoice
 CopySource
 CopyStmt
+CopyToRoutine
 CopyToState
 CopyToStateData
 Cost