Improve performance of binary COPY FROM through better buffering.
authorTom Lane <tgl@sss.pgh.pa.us>
Sat, 25 Jul 2020 20:34:35 +0000 (16:34 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Sat, 25 Jul 2020 20:34:35 +0000 (16:34 -0400)
At least on Linux and macOS, fread() turns out to have far higher
per-call overhead than one could wish.  Reading 64KB of data at a time
and then parceling it out with our own memcpy logic makes binary COPY
from a file significantly faster --- around 30% in simple testing for
cases with narrow text columns (on Linux ... even more on macOS).

In binary COPY from frontend, there's no per-call fread(), and this
patch introduces an extra layer of memcpy'ing, but it still manages
to eke out a small win.  Apparently, the control-logic overhead in
CopyGetData() is enough to be worth avoiding for small fetches.

Bharath Rupireddy and Amit Langote, reviewed by Vignesh C,
cosmetic tweaks by me

Discussion: https://postgr.es/m/CALj2ACU5Bz06HWLwqSzNMN=Gupoj6Rcn_QVC+k070V4em9wu=A@mail.gmail.com

src/backend/commands/copy.c

index 44da71c4cb5cad1027086e3a59d3420d25e78cad..db7d24a511e3b468d514b7a5bcc9ec21d13596a5 100644 (file)
@@ -187,15 +187,15 @@ typedef struct CopyStateData
        TransitionCaptureState *transition_capture;
 
        /*
-        * These variables are used to reduce overhead in textual COPY FROM.
+        * These variables are used to reduce overhead in COPY FROM.
         *
         * attribute_buf holds the separated, de-escaped text for each field of
         * the current line.  The CopyReadAttributes functions return arrays of
         * pointers into this buffer.  We avoid palloc/pfree overhead by re-using
         * the buffer on each cycle.
         *
-        * (In binary COPY FROM, attribute_buf holds the binary data for the
-        * current field, while the other variables are not used.)
+        * In binary COPY FROM, attribute_buf holds the binary data for the
+        * current field, but the usage is otherwise similar.
         */
        StringInfoData attribute_buf;
 
@@ -209,7 +209,8 @@ typedef struct CopyStateData
         * input cycle is first to read the whole line into line_buf, convert it
         * to server encoding there, and then extract the individual attribute
         * fields into attribute_buf.  line_buf is preserved unmodified so that we
-        * can display it in error messages if appropriate.
+        * can display it in error messages if appropriate.  (In binary mode,
+        * line_buf is not used.)
         */
        StringInfoData line_buf;
        bool            line_buf_converted; /* converted to server encoding? */
@@ -217,15 +218,18 @@ typedef struct CopyStateData
 
        /*
         * Finally, raw_buf holds raw data read from the data source (file or
-        * client connection).  CopyReadLine parses this data sufficiently to
-        * locate line boundaries, then transfers the data to line_buf and
-        * converts it.  Note: we guarantee that there is a \0 at
-        * raw_buf[raw_buf_len].
+        * client connection).  In text mode, CopyReadLine parses this data
+        * sufficiently to locate line boundaries, then transfers the data to
+        * line_buf and converts it.  In binary mode, CopyReadBinaryData fetches
+        * appropriate amounts of data from this buffer.  In both modes, we
+        * guarantee that there is a \0 at raw_buf[raw_buf_len].
         */
 #define RAW_BUF_SIZE 65536             /* we palloc RAW_BUF_SIZE+1 bytes */
        char       *raw_buf;
        int                     raw_buf_index;  /* next byte to process */
        int                     raw_buf_len;    /* total # of bytes stored */
+       /* Shorthand for number of unconsumed bytes available in raw_buf */
+#define RAW_BUF_BYTES(cstate) ((cstate)->raw_buf_len - (cstate)->raw_buf_index)
 } CopyStateData;
 
 /* DestReceiver for COPY (query) TO */
@@ -394,6 +398,8 @@ static void CopySendInt32(CopyState cstate, int32 val);
 static bool CopyGetInt32(CopyState cstate, int32 *val);
 static void CopySendInt16(CopyState cstate, int16 val);
 static bool CopyGetInt16(CopyState cstate, int16 *val);
+static bool CopyLoadRawBuf(CopyState cstate);
+static int     CopyReadBinaryData(CopyState cstate, char *dest, int nbytes);
 
 
 /*
@@ -723,7 +729,7 @@ CopyGetData(CopyState cstate, void *databuf, int minread, int maxread)
 /*
  * CopySendInt32 sends an int32 in network byte order
  */
-static void
+static inline void
 CopySendInt32(CopyState cstate, int32 val)
 {
        uint32          buf;
@@ -737,12 +743,12 @@ CopySendInt32(CopyState cstate, int32 val)
  *
  * Returns true if OK, false if EOF
  */
-static bool
+static inline bool
 CopyGetInt32(CopyState cstate, int32 *val)
 {
        uint32          buf;
 
-       if (CopyGetData(cstate, &buf, sizeof(buf), sizeof(buf)) != sizeof(buf))
+       if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
        {
                *val = 0;                               /* suppress compiler warning */
                return false;
@@ -754,7 +760,7 @@ CopyGetInt32(CopyState cstate, int32 *val)
 /*
  * CopySendInt16 sends an int16 in network byte order
  */
-static void
+static inline void
 CopySendInt16(CopyState cstate, int16 val)
 {
        uint16          buf;
@@ -766,12 +772,12 @@ CopySendInt16(CopyState cstate, int16 val)
 /*
  * CopyGetInt16 reads an int16 that appears in network byte order
  */
-static bool
+static inline bool
 CopyGetInt16(CopyState cstate, int16 *val)
 {
        uint16          buf;
 
-       if (CopyGetData(cstate, &buf, sizeof(buf), sizeof(buf)) != sizeof(buf))
+       if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
        {
                *val = 0;                               /* suppress compiler warning */
                return false;
@@ -786,26 +792,20 @@ CopyGetInt16(CopyState cstate, int16 *val)
  *
  * Returns true if able to obtain at least one more byte, else false.
  *
- * If raw_buf_index < raw_buf_len, the unprocessed bytes are transferred
- * down to the start of the buffer and then we load more data after that.
- * This case is used only when a frontend multibyte character crosses a
- * bufferload boundary.
+ * If RAW_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the start
+ * of the buffer and then we load more data after that.  This case occurs only
+ * when a multibyte character crosses a bufferload boundary.
  */
 static bool
 CopyLoadRawBuf(CopyState cstate)
 {
-       int                     nbytes;
+       int                     nbytes = RAW_BUF_BYTES(cstate);
        int                     inbytes;
 
-       if (cstate->raw_buf_index < cstate->raw_buf_len)
-       {
-               /* Copy down the unprocessed data */
-               nbytes = cstate->raw_buf_len - cstate->raw_buf_index;
+       /* Copy down the unprocessed data if any. */
+       if (nbytes > 0)
                memmove(cstate->raw_buf, cstate->raw_buf + cstate->raw_buf_index,
                                nbytes);
-       }
-       else
-               nbytes = 0;                             /* no data need be saved */
 
        inbytes = CopyGetData(cstate, cstate->raw_buf + nbytes,
                                                  1, RAW_BUF_SIZE - nbytes);
@@ -816,6 +816,54 @@ CopyLoadRawBuf(CopyState cstate)
        return (inbytes > 0);
 }
 
+/*
+ * CopyReadBinaryData
+ *
+ * Reads up to 'nbytes' bytes from cstate->copy_file via cstate->raw_buf
+ * and writes them to 'dest'.  Returns the number of bytes read (which
+ * would be less than 'nbytes' only if we reach EOF).
+ */
+static int
+CopyReadBinaryData(CopyState cstate, char *dest, int nbytes)
+{
+       int                     copied_bytes = 0;
+
+       if (RAW_BUF_BYTES(cstate) >= nbytes)
+       {
+               /* Enough bytes are present in the buffer. */
+               memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, nbytes);
+               cstate->raw_buf_index += nbytes;
+               copied_bytes = nbytes;
+       }
+       else
+       {
+               /*
+                * Not enough bytes in the buffer, so must read from the file.  Need
+                * to loop since 'nbytes' could be larger than the buffer size.
+                */
+               do
+               {
+                       int                     copy_bytes;
+
+                       /* Load more data if buffer is empty. */
+                       if (RAW_BUF_BYTES(cstate) == 0)
+                       {
+                               if (!CopyLoadRawBuf(cstate))
+                                       break;          /* EOF */
+                       }
+
+                       /* Transfer some bytes. */
+                       copy_bytes = Min(nbytes - copied_bytes, RAW_BUF_BYTES(cstate));
+                       memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, copy_bytes);
+                       cstate->raw_buf_index += copy_bytes;
+                       dest += copy_bytes;
+                       copied_bytes += copy_bytes;
+               } while (copied_bytes < nbytes);
+       }
+
+       return copied_bytes;
+}
+
 
 /*
  *      DoCopy executes the SQL COPY statement
@@ -3366,17 +3414,17 @@ BeginCopyFrom(ParseState *pstate,
        cstate->cur_attval = NULL;
 
        /*
-        * Set up variables to avoid per-attribute overhead.  attribute_buf is
-        * used in both text and binary modes, but we use line_buf and raw_buf
+        * Set up variables to avoid per-attribute overhead.  attribute_buf and
+        * raw_buf are used in both text and binary modes, but we use line_buf
         * only in text mode.
         */
        initStringInfo(&cstate->attribute_buf);
+       cstate->raw_buf = (char *) palloc(RAW_BUF_SIZE + 1);
+       cstate->raw_buf_index = cstate->raw_buf_len = 0;
        if (!cstate->binary)
        {
                initStringInfo(&cstate->line_buf);
                cstate->line_buf_converted = false;
-               cstate->raw_buf = (char *) palloc(RAW_BUF_SIZE + 1);
-               cstate->raw_buf_index = cstate->raw_buf_len = 0;
        }
 
        /* Assign range table, we'll need it in CopyFrom. */
@@ -3527,7 +3575,7 @@ BeginCopyFrom(ParseState *pstate,
                int32           tmp;
 
                /* Signature */
-               if (CopyGetData(cstate, readSig, 11, 11) != 11 ||
+               if (CopyReadBinaryData(cstate, readSig, 11) != 11 ||
                        memcmp(readSig, BinarySignature, 11) != 0)
                        ereport(ERROR,
                                        (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
@@ -3555,7 +3603,7 @@ BeginCopyFrom(ParseState *pstate,
                /* Skip extension header, if present */
                while (tmp-- > 0)
                {
-                       if (CopyGetData(cstate, readSig, 1, 1) != 1)
+                       if (CopyReadBinaryData(cstate, readSig, 1) != 1)
                                ereport(ERROR,
                                                (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                                                 errmsg("invalid COPY file header (wrong length)")));
@@ -3771,7 +3819,7 @@ NextCopyFrom(CopyState cstate, ExprContext *econtext,
                        char            dummy;
 
                        if (cstate->copy_dest != COPY_OLD_FE &&
-                               CopyGetData(cstate, &dummy, 1, 1) > 0)
+                               CopyReadBinaryData(cstate, &dummy, 1) > 0)
                                ereport(ERROR,
                                                (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                                                 errmsg("received copy data after EOF marker")));
@@ -4744,8 +4792,8 @@ CopyReadBinaryAttribute(CopyState cstate, FmgrInfo *flinfo,
        resetStringInfo(&cstate->attribute_buf);
 
        enlargeStringInfo(&cstate->attribute_buf, fld_size);
-       if (CopyGetData(cstate, cstate->attribute_buf.data,
-                                       fld_size, fld_size) != fld_size)
+       if (CopyReadBinaryData(cstate, cstate->attribute_buf.data,
+                                                  fld_size) != fld_size)
                ereport(ERROR,
                                (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                                 errmsg("unexpected EOF in COPY data")));