From eec57115e4c866f26bdc8bcbe3e2e7be4c6d0450 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Wed, 14 Jul 2021 13:08:28 +0300 Subject: [PATCH] In psql \copy from, send data to server in larger chunks. Previously, we would send each line as a separate CopyData message. That's pretty wasteful if the table is narrow, as each CopyData message has 5 bytes of overhead. For efficiency, buffer up and pack 8 kB of input data into each CopyData message. The server also sends each line as a separate CopyData message in COPY TO STDOUT, and that's similarly wasteful. But that's documented in the FE/BE protocol description, so changing that would be a wire protocol break. Reviewed-by: Aleksander Alekseev Discussion: https://www.postgresql.org/message-id/40b2cec0-d0fb-3191-2ae1-9a3fe16a7e48%40iki.fi --- src/bin/psql/copy.c | 99 +++++++++++++++++++++++++-------------------- 1 file changed, 56 insertions(+), 43 deletions(-) diff --git a/src/bin/psql/copy.c b/src/bin/psql/copy.c index e1fee8e099..64ab40c4f7 100644 --- a/src/bin/psql/copy.c +++ b/src/bin/psql/copy.c @@ -581,13 +581,21 @@ handleCopyIn(PGconn *conn, FILE *copystream, bool isbinary, PGresult **res) else { bool copydone = false; + int buflen; + bool at_line_begin = true; + /* + * In text mode, we have to read the input one line at a time, so that + * we can stop reading at the EOF marker (\.). We mustn't read beyond + * the EOF marker, because if the data was inlined in a SQL script, we + * would eat up the commands after the EOF marker. + */ + buflen = 0; while (!copydone) - { /* for each input line ... */ - bool firstload; - bool linedone; + { + char *fgresult; - if (showprompt) + if (at_line_begin && showprompt) { const char *prompt = get_prompt(PROMPT_COPY, NULL); @@ -595,63 +603,68 @@ handleCopyIn(PGconn *conn, FILE *copystream, bool isbinary, PGresult **res) fflush(stdout); } - firstload = true; - linedone = false; - - while (!linedone) - { /* for each bufferload in line ... */ - int linelen; - char *fgresult; - - /* enable longjmp while waiting for input */ - sigint_interrupt_enabled = true; + /* enable longjmp while waiting for input */ + sigint_interrupt_enabled = true; - fgresult = fgets(buf, sizeof(buf), copystream); + fgresult = fgets(&buf[buflen], COPYBUFSIZ - buflen, copystream); - sigint_interrupt_enabled = false; + sigint_interrupt_enabled = false; - if (!fgresult) - { - copydone = true; - break; - } + if (!fgresult) + copydone = true; + else + { + int linelen; - linelen = strlen(buf); + linelen = strlen(fgresult); + buflen += linelen; /* current line is done? */ - if (linelen > 0 && buf[linelen - 1] == '\n') - linedone = true; - - /* check for EOF marker, but not on a partial line */ - if (firstload) + if (buf[buflen - 1] == '\n') { - /* - * This code erroneously assumes '\.' on a line alone - * inside a quoted CSV string terminates the \copy. - * https://www.postgresql.org/message-id/E1TdNVQ-0001ju-GO@wrigleys.postgresql.org - */ - if (strcmp(buf, "\\.\n") == 0 || - strcmp(buf, "\\.\r\n") == 0) + /* check for EOF marker, but not on a partial line */ + if (at_line_begin) { - copydone = true; - break; + /* + * This code erroneously assumes '\.' on a line alone + * inside a quoted CSV string terminates the \copy. + * https://www.postgresql.org/message-id/E1TdNVQ-0001ju-GO@wrigleys.postgresql.org + */ + if ((linelen == 3 && memcmp(fgresult, "\\.\n", 3) == 0) || + (linelen == 4 && memcmp(fgresult, "\\.\r\n", 4) == 0)) + { + copydone = true; + } } - firstload = false; + if (copystream == pset.cur_cmd_source) + { + pset.lineno++; + pset.stmt_lineno++; + } + at_line_begin = true; } + else + at_line_begin = false; + } - if (PQputCopyData(conn, buf, linelen) <= 0) + /* + * If the buffer is full, or we've reached the EOF, flush it. + * + * Make sure there's always space for four more bytes in the + * buffer, plus a NUL terminator. That way, an EOF marker is + * never split across two fgets() calls, which simplies the logic. + */ + if (buflen >= COPYBUFSIZ - 5 || (copydone && buflen > 0)) + { + if (PQputCopyData(conn, buf, buflen) <= 0) { OK = false; copydone = true; break; } - } - if (copystream == pset.cur_cmd_source) - { - pset.lineno++; - pset.stmt_lineno++; + buflen = 0; } } } -- 2.39.5