diff options
| author | Peter Eisentraut | 2022-03-30 06:56:58 +0000 |
|---|---|---|
| committer | Peter Eisentraut | 2022-03-30 07:02:31 +0000 |
| commit | 072132f04e55c1c3b0f1a582318da78de7334379 (patch) | |
| tree | d855c0b1716968cd26966e34f41f77de0c0d0af6 /src/backend/commands | |
| parent | edcedcc2c7bb8390858bbccda9637318598f2473 (diff) | |
Add header matching mode to COPY FROM
COPY FROM supports the HEADER option to silently discard the header
line from a CSV or text file. It is possible to load by mistake a
file that matches the expected format, for example, if two text
columns have been swapped, resulting in garbage in the database.
This adds a new option value HEADER MATCH that checks the column names
in the header line against the actual column names and errors out if
they do not match.
Author: Rémi Lapeyre <remi.lapeyre@lenstra.fr>
Reviewed-by: Daniel Verite <daniel@manitou-mail.org>
Reviewed-by: Peter Eisentraut <peter.eisentraut@enterprisedb.com>
Discussion: https://www.postgresql.org/message-id/flat/CAF1-J-0PtCWMeLtswwGV2M70U26n4g33gpe1rcKQqe6wVQDrFA@mail.gmail.com
Diffstat (limited to 'src/backend/commands')
| -rw-r--r-- | src/backend/commands/copy.c | 60 | ||||
| -rw-r--r-- | src/backend/commands/copyfromparse.c | 53 |
2 files changed, 109 insertions, 4 deletions
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 7a0c897cc97..689713ea580 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -314,6 +314,64 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt, } /* + * Extract a CopyHeaderChoice value from a DefElem. This is like + * defGetBoolean() but also accepts the special value "match". + */ +static CopyHeaderChoice +defGetCopyHeaderChoice(DefElem *def) +{ + /* + * If no parameter given, assume "true" is meant. + */ + if (def->arg == NULL) + return COPY_HEADER_TRUE; + + /* + * Allow 0, 1, "true", "false", "on", "off", or "match". + */ + switch (nodeTag(def->arg)) + { + case T_Integer: + switch (intVal(def->arg)) + { + case 0: + return COPY_HEADER_FALSE; + case 1: + return COPY_HEADER_TRUE; + default: + /* otherwise, error out below */ + break; + } + break; + default: + { + char *sval = defGetString(def); + + /* + * The set of strings accepted here should match up with the + * grammar's opt_boolean_or_string production. + */ + if (pg_strcasecmp(sval, "true") == 0) + return COPY_HEADER_TRUE; + if (pg_strcasecmp(sval, "false") == 0) + return COPY_HEADER_FALSE; + if (pg_strcasecmp(sval, "on") == 0) + return COPY_HEADER_TRUE; + if (pg_strcasecmp(sval, "off") == 0) + return COPY_HEADER_FALSE; + if (pg_strcasecmp(sval, "match") == 0) + return COPY_HEADER_MATCH; + } + break; + } + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("%s requires a Boolean value or \"match\"", + def->defname))); + return COPY_HEADER_FALSE; /* keep compiler quiet */ +} + +/* * Process the statement option list for COPY. * * Scan the options list (a list of DefElem) and transpose the information @@ -394,7 +452,7 @@ ProcessCopyOptions(ParseState *pstate, if (header_specified) errorConflictingDefElem(defel, pstate); header_specified = true; - opts_out->header_line = defGetBoolean(defel); + opts_out->header_line = defGetCopyHeaderChoice(defel); } else if (strcmp(defel->defname, "quote") == 0) { diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c index baf328b620c..58017ec53b0 100644 --- a/src/backend/commands/copyfromparse.c +++ b/src/backend/commands/copyfromparse.c @@ -72,6 +72,7 @@ #include "miscadmin.h" #include "pgstat.h" #include "port/pg_bswap.h" +#include "utils/builtins.h" #include "utils/memutils.h" #include "utils/rel.h" @@ -758,12 +759,58 @@ NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields) /* only available for text or csv input */ Assert(!cstate->opts.binary); - /* on input just throw the header line away */ + /* on input check that the header line is correct if needed */ if (cstate->cur_lineno == 0 && cstate->opts.header_line) { + ListCell *cur; + TupleDesc tupDesc; + + tupDesc = RelationGetDescr(cstate->rel); + cstate->cur_lineno++; - if (CopyReadLine(cstate)) - return false; /* done */ + done = CopyReadLine(cstate); + + if (cstate->opts.header_line == COPY_HEADER_MATCH) + { + int fldnum; + + if (cstate->opts.csv_mode) + fldct = CopyReadAttributesCSV(cstate); + else + fldct = CopyReadAttributesText(cstate); + + if (fldct != list_length(cstate->attnumlist)) + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + errmsg("wrong number of fields in header line: field count is %d, expected %d", + fldct, list_length(cstate->attnumlist)))); + + fldnum = 0; + foreach(cur, cstate->attnumlist) + { + int attnum = lfirst_int(cur); + char *colName = cstate->raw_fields[attnum - 1]; + Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1); + + fldnum++; + + if (colName == NULL) + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + errmsg("column name mismatch in header line field %d: got null value (\"%s\"), expected \"%s\"", + fldnum, cstate->opts.null_print, NameStr(attr->attname)))); + + if (namestrcmp(&attr->attname, colName) != 0) { + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + errmsg("column name mismatch in header line field %d: got \"%s\", expected \"%s\"", + fldnum, colName, NameStr(attr->attname)))); + } + } + } + + if (done) + return false; } cstate->cur_lineno++; |
