Complete TODO item:
authorBruce Momjian <bruce@momjian.us>
Mon, 19 Apr 2004 17:22:31 +0000 (17:22 +0000)
committerBruce Momjian <bruce@momjian.us>
Mon, 19 Apr 2004 17:22:31 +0000 (17:22 +0000)
        o -Allow dump/load of CSV format

This adds new keywords to COPY and \copy:

        CSV - enable CSV mode (comma separated variable)
        QUOTE - specify quote character
        ESCAPE - specify escape character
        FORCE - force quoting of specified column
LITERAL - suppress null comparison for columns

Doc changes included.  Regression updates coming from Andrew.

doc/src/sgml/ref/copy.sgml
doc/src/sgml/ref/psql-ref.sgml
src/backend/commands/copy.c
src/backend/parser/gram.y
src/backend/parser/keywords.c
src/backend/tcop/fastpath.c
src/bin/psql/copy.c

index 1e12cbf35168e72264ba95d524f5726eb9b1b4ba..7d53d5d3aad2820da1cf211544d7ecba1aef8383 100644 (file)
@@ -1,5 +1,5 @@
 <!--
-$PostgreSQL: pgsql/doc/src/sgml/ref/copy.sgml,v 1.55 2003/12/13 23:59:07 neilc Exp $
+$PostgreSQL: pgsql/doc/src/sgml/ref/copy.sgml,v 1.56 2004/04/19 17:22:30 momjian Exp $
 PostgreSQL documentation
 -->
 
@@ -26,7 +26,10 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla
           [ BINARY ] 
           [ OIDS ]
           [ DELIMITER [ AS ] '<replaceable class="parameter">delimiter</replaceable>' ]
-          [ NULL [ AS ] '<replaceable class="parameter">null string</replaceable>' ] ]
+          [ NULL [ AS ] '<replaceable class="parameter">null string</replaceable>' ]
+          [ CSV [ QUOTE [ AS ] '<replaceable class="parameter">quote</replaceable>' ] 
+                [ ESCAPE [ AS ] '<replaceable class="parameter">escape</replaceable>' ]
+                [ LITERAL <replaceable class="parameter">column</replaceable> [, ...] ]
 
 COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable class="parameter">column</replaceable> [, ...] ) ]
     TO { '<replaceable class="parameter">filename</replaceable>' | STDOUT }
@@ -34,7 +37,10 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla
           [ BINARY ]
           [ OIDS ]
           [ DELIMITER [ AS ] '<replaceable class="parameter">delimiter</replaceable>' ]
-          [ NULL [ AS ] '<replaceable class="parameter">null string</replaceable>' ] ]
+          [ NULL [ AS ] '<replaceable class="parameter">null string</replaceable>' ]
+          [ CSV [ QUOTE [ AS ] '<replaceable class="parameter">quote</replaceable>' ] 
+                [ ESCAPE [ AS ] '<replaceable class="parameter">escape</replaceable>' ]
+                [ FORCE <replaceable class="parameter">column</replaceable> [, ...] ]
 </synopsis>
  </refsynopsisdiv>
  
@@ -146,7 +152,8 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla
     <listitem>
      <para>
       The single character that separates columns within each row
-      (line) of the file.  The default is a tab character.
+      (line) of the file.  The default is a tab character in text mode,
+      a comma in <literal>CSV</> mode.
      </para>
     </listitem>
    </varlistentry>
@@ -156,20 +163,86 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla
     <listitem>
      <para>
       The string that represents a null value. The default is
-      <literal>\N</literal> (backslash-N). You might prefer an empty
-      string, for example.
+      <literal>\N</literal> (backslash-N) in text mode, and a empty
+      value with no quotes in <literal>CSV</> mode. You might prefer an
+      empty string even in text mode for cases where you don't want to
+      distinguish nulls from empty strings.
      </para>
 
      <note>
       <para>
-       On a <command>COPY FROM</command>, any data item that matches
+       When using <command>COPY FROM</command>, any data item that matches
        this string will be stored as a null value, so you should make
        sure that you use the same string as you used with
        <command>COPY TO</command>.
       </para>
      </note>
+
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><literal>CSV</literal></term>
+    <listitem>
+     <para>
+      Enables Comma Separated Variable (<literal>CSV</>) mode.  (Also called
+      Comma Separated Value).  It sets the default <literal>DELIMITER</> to 
+      comma, and <literal>QUOTE</> and <literal>ESCAPE</> values to 
+      double-quote.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="parameter">quote</replaceable></term>
+    <listitem>
+     <para>
+      Specifies the quotation character in <literal>CSV</> mode.
+      The default is double-quote.
+     </para>
     </listitem>
    </varlistentry>
+
+   <varlistentry>
+    <term><replaceable class="parameter">escape</replaceable></term>
+    <listitem>
+     <para>
+      Specifies the character that should appear before a <literal>QUOTE</>
+      data character value in <literal>CSV</> mode.  The default is the
+      <literal>QUOTE</> value (usually double-quote).
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><literal>FORCE</></term>
+    <listitem>
+     <para>
+      In <literal>CSV</> <command>COPY TO</> mode, forces quoting
+      to be used for all non-<literal>NULL</> values in each specified 
+      column.  <literal>NULL</> output is never quoted.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term><literal>LITERAL</></term>
+    <listitem>
+     <para>
+      In <literal>CSV</> <command>COPY FROM</> mode, for each column specified,
+      do not do a <literal>null string</> comparison;  instead load the value 
+      literally.  <literal>QUOTE</> and <literal>ESCAPE</> processing are still
+      performed.
+     </para>
+     <para>
+      If the <literal>null string</> is <literal>''</> (the default 
+      in <literal>CSV</> mode), a missing input value (<literal>delimiter, 
+      delimiter</>), will load as a zero-length string.  <literal>Delimiter, quote, 
+      quote, delimiter</> is always treated as a zero-length string on input.
+     </para>
+    </listitem>
+   </varlistentry>
+
   </variablelist>
  </refsect1>
 
@@ -233,6 +306,17 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla
     constraints on the destination table. However, it will not invoke rules.
    </para>
 
+   <para>
+    <command>COPY</command> input and output is affected by
+    <varname>DateStyle </varname>. For portability with other
+    <productname>PostgreSQL</productname> installations which might use
+    non-default <varname>DateStyle</varname> settings,
+    <varname>DateStyle</varname> should be set to <literal>ISO</> before
+    using <command>COPY</>. In <literal>CSV</> mode, use <literal>ISO</>
+    or a <varname>DateStyle</varname> setting appropriate for the
+    external application.
+   </para>
+
    <para>
     <command>COPY</command> stops operation at the first error. This
     should not lead to problems in the event of a <command>COPY
@@ -253,7 +337,8 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla
 
    <para>
     When <command>COPY</command> is used without the <literal>BINARY</literal> option,
-    the data read or written is a text file with one line per table row.
+    the data read or written is a text file with one line per table row,
+    unless <literal>CSV</> mode is used.
     Columns in a row are separated by the delimiter character.
     The column values themselves are strings generated by the
     output function, or acceptable to the input function, of each
@@ -379,6 +464,63 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla
    </para>
   </refsect2>
 
+  <refsect2>
+   <title>CSV Format</title>
+
+   <para>
+    This format is used for importing and exporting the Comma
+    Separated Variable (<literal>CSV</>) file format used by many other
+    programs, such as spreadsheets. Instead of the escaping used by
+    <productname>PostgreSQL</productname>'s standard text mode, it
+    produces and recognises the common CSV escaping mechanism.
+   </para>
+
+   <para>
+    The values in each record are separated by the <literal>DELIMITER</>
+    character. If the value contains the delimiter character, the
+    <literal>QUOTE</> character, the <literal>NULL</> string, a carriage
+    return, or line feed character, then the whole value is prefixed and
+    suffixed by the <literal>QUOTE</> character, and any occurrence
+    within the value of a <literal>QUOTE</> character or the
+    <literal>ESCAPE</> character is preceded by the escape character.
+    You can also use <literal>FORCE</> to force quotes when outputting
+    non-<literal>NULL</> values in specific columns.
+   </para>
+
+   <para> 
+    In general, the <literal>CSV</> format has no way to distinguish a
+    <literal>NULL</> from an empty string.
+    <productname>PostgreSQL</productname>'s COPY handles this by
+    quoting. A <literal>NULL</> is output as the <literal>NULL</> string 
+    and is not quoted, while a data value matching the <literal>NULL</> string 
+    is quoted. Therefore, using the default settings, a <literal>NULL</> is
+    written as an unquoted empty string, while an empty string is
+    written with double quotes (<literal>""</>). Reading values follows 
+    similar rules. You can use <literal>LITERAL</> to prevent <literal>NULL</>
+    input comparisons for specific columns.
+   </para>
+
+   <note>
+    <para>
+     CSV mode will both recognize and produce CSV files with quoted
+     values containing embedded carriage returns and line feeds. Thus
+     the files are not strictly one line per table row like text-mode
+     files.
+    </para>
+   </note>
+
+   <note>
+    <para>
+     Many programs produce strange and occasionally perverse CSV files,
+     so the file format is more a convention than a standard. Thus you
+     might encounter some files that cannot be imported using this
+     mechanism, and <command>COPY</> might produce files that other
+     programs can not process.
+    </para>
+   </note>
+    
+  </refsect2>
+
   <refsect2>
    <title>Binary Format</title>
 
index 206fe2719bb4c174bf29f99b0923e94d41495167..c41080e1bac63f907422917832ea0b55ca5e1ad0 100644 (file)
@@ -1,5 +1,5 @@
 <!--
-$PostgreSQL: pgsql/doc/src/sgml/ref/psql-ref.sgml,v 1.110 2004/04/12 15:58:52 momjian Exp $
+$PostgreSQL: pgsql/doc/src/sgml/ref/psql-ref.sgml,v 1.111 2004/04/19 17:22:30 momjian Exp $
 PostgreSQL documentation
 -->
 
@@ -711,6 +711,10 @@ testdb=>
             [ <literal>oids</literal> ] 
             [ <literal>delimiter [as] </literal> '<replaceable class="parameter">character</replaceable>' ]
             [ <literal>null [as] </literal> '<replaceable class="parameter">string</replaceable>' ]</literal>
+            [ <literal>csv [ quote [as] </literal> '<replaceable class="parameter">character</replaceable>' ]
+                           [ <literal>escape [as] </literal> '<replaceable class="parameter">character</replaceable>' ]
+                           [ <literal>force</> <replaceable class="parameter">column_list</replaceable> ]
+                           [ <literal>literal</> <replaceable class="parameter">column_list</replaceable> ] ]
         </term>
 
         <listitem>
index 07c9ff4d531da52712f440a5f5127ec2203dbd9a..abb153616aa8801f9c8513a787221fe840eadf1d 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.220 2004/04/15 22:36:03 momjian Exp $
+ *   $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.221 2004/04/19 17:22:30 momjian Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -70,7 +70,8 @@ typedef enum CopyDest
 typedef enum CopyReadResult
 {
    NORMAL_ATTR,
-   END_OF_LINE
+   END_OF_LINE,
+   UNTERMINATED_FIELD
 } CopyReadResult;
 
 /*
@@ -130,15 +131,22 @@ static bool line_buf_converted;
 
 /* non-export function prototypes */
 static void CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
-      char *delim, char *null_print);
+      char *delim, char *null_print, bool csv_mode, char *quote, char *escape,
+      List *force_atts);
 static void CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
-        char *delim, char *null_print);
+        char *delim, char *null_print, bool csv_mode, char *quote, char *escape,
+        List *literal_atts);
 static bool CopyReadLine(void);
 static char *CopyReadAttribute(const char *delim, const char *null_print,
                               CopyReadResult *result, bool *isnull);
+static char *CopyReadAttributeCSV(const char *delim, const char *null_print,
+                              char *quote, char *escape,
+                              CopyReadResult *result, bool *isnull);
 static Datum CopyReadBinaryAttribute(int column_no, FmgrInfo *flinfo,
                        Oid typelem, bool *isnull);
 static void CopyAttributeOut(char *string, char *delim);
+static void CopyAttributeOutCSV(char *string, char *delim, char *quote,
+                               char *escape, bool force_quote);
 static List *CopyGetAttnums(Relation rel, List *attnamelist);
 static void limit_printout_length(StringInfo buf);
 
@@ -682,8 +690,15 @@ DoCopy(const CopyStmt *stmt)
    List       *attnumlist;
    bool        binary = false;
    bool        oids = false;
+   bool        csv_mode = false;
    char       *delim = NULL;
+   char       *quote = NULL;
+   char       *escape = NULL;
    char       *null_print = NULL;
+   List       *force = NIL;
+   List       *literal = NIL;
+   List       *force_atts = NIL;
+   List       *literal_atts = NIL;
    Relation    rel;
    AclMode     required_access = (is_from ? ACL_INSERT : ACL_SELECT);
    AclResult   aclresult;
@@ -725,6 +740,46 @@ DoCopy(const CopyStmt *stmt)
                         errmsg("conflicting or redundant options")));
            null_print = strVal(defel->arg);
        }
+       else if (strcmp(defel->defname, "csv") == 0)
+       {
+           if (csv_mode)
+               ereport(ERROR,
+                       (errcode(ERRCODE_SYNTAX_ERROR),
+                        errmsg("conflicting or redundant options")));
+           csv_mode = intVal(defel->arg);
+       }
+       else if (strcmp(defel->defname, "quote") == 0)
+       {
+           if (quote)
+               ereport(ERROR,
+                       (errcode(ERRCODE_SYNTAX_ERROR),
+                        errmsg("conflicting or redundant options")));
+           quote = strVal(defel->arg);
+       }
+       else if (strcmp(defel->defname, "escape") == 0)
+       {
+           if (escape)
+               ereport(ERROR,
+                       (errcode(ERRCODE_SYNTAX_ERROR),
+                        errmsg("conflicting or redundant options")));
+           escape = strVal(defel->arg);
+       }
+       else if (strcmp(defel->defname, "force") == 0)
+       {
+           if (force)
+               ereport(ERROR,
+                       (errcode(ERRCODE_SYNTAX_ERROR),
+                        errmsg("conflicting or redundant options")));
+           force = (List *)defel->arg;
+       }
+       else if (strcmp(defel->defname, "literal") == 0)
+       {
+           if (literal)
+               ereport(ERROR,
+                       (errcode(ERRCODE_SYNTAX_ERROR),
+                        errmsg("conflicting or redundant options")));
+           literal = (List *)defel->arg;
+       }
        else
            elog(ERROR, "option \"%s\" not recognized",
                 defel->defname);
@@ -735,6 +790,11 @@ DoCopy(const CopyStmt *stmt)
                (errcode(ERRCODE_SYNTAX_ERROR),
                 errmsg("cannot specify DELIMITER in BINARY mode")));
 
+   if (binary && csv_mode)
+       ereport(ERROR,
+               (errcode(ERRCODE_SYNTAX_ERROR),
+                errmsg("cannot specify CSV in BINARY mode")));
+
    if (binary && null_print)
        ereport(ERROR,
                (errcode(ERRCODE_SYNTAX_ERROR),
@@ -742,10 +802,92 @@ DoCopy(const CopyStmt *stmt)
 
    /* Set defaults */
    if (!delim)
-       delim = "\t";
-
+       delim = csv_mode ? "," : "\t";
+   
    if (!null_print)
-       null_print = "\\N";
+       null_print = csv_mode ? "" : "\\N";
+
+   if (csv_mode)
+   {
+       if (!quote)
+           quote = "\"";
+       if (!escape)
+           escape = quote;
+   }
+       
+   /*
+    * Only single-character delimiter strings are supported.
+    */
+   if (strlen(delim) != 1)
+       ereport(ERROR,
+               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                errmsg("COPY delimiter must be a single character")));
+
+   /*
+    * Check quote
+    */
+   if (!csv_mode && quote != NULL)
+       ereport(ERROR,
+               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                errmsg("COPY quote available only in CSV mode")));
+
+   if (csv_mode && strlen(quote) != 1)
+       ereport(ERROR,
+               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                errmsg("COPY quote must be a single character")));
+
+   /*
+    * Check escape
+    */
+   if (!csv_mode && escape != NULL)
+       ereport(ERROR,
+               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                errmsg("COPY escape available only in CSV mode")));
+
+   if (csv_mode && strlen(escape) != 1)
+       ereport(ERROR,
+               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                errmsg("COPY escape must be a single character")));
+
+   /*
+    * Check force
+    */
+   if (!csv_mode && force != NIL)
+       ereport(ERROR,
+               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                errmsg("COPY force available only in CSV mode")));
+   if (force != NIL && is_from)
+       ereport(ERROR,
+               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                errmsg("COPY force only available using COPY TO")));
+
+   /*
+    * Check literal
+    */
+   if (!csv_mode && literal != NIL)
+       ereport(ERROR,
+               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                errmsg("COPY literal available only in CSV mode")));
+   if (literal != NIL && !is_from)
+       ereport(ERROR,
+               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                errmsg("COPY literal only available using COPY FROM")));
+
+   /*
+    * Don't allow the delimiter to appear in the null string.
+    */
+   if (strchr(null_print, delim[0]) != NULL)
+       ereport(ERROR,
+               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                errmsg("COPY delimiter must not appear in the NULL specification")));
+
+   /*
+    * Don't allow the csv quote char to appear in the null string.
+    */
+   if (csv_mode && strchr(null_print, quote[0]) != NULL)
+       ereport(ERROR,
+               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                errmsg("CSV quote character must not appear in the NULL specification")));
 
    /*
     * Open and lock the relation, using the appropriate lock type.
@@ -771,22 +913,6 @@ DoCopy(const CopyStmt *stmt)
                 errhint("Anyone can COPY to stdout or from stdin. "
                       "psql's \\copy command also works for anyone.")));
 
-   /*
-    * Presently, only single-character delimiter strings are supported.
-    */
-   if (strlen(delim) != 1)
-       ereport(ERROR,
-               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                errmsg("COPY delimiter must be a single character")));
-
-   /*
-    * Don't allow the delimiter to appear in the null string.
-    */
-   if (strchr(null_print, delim[0]) != NULL)
-       ereport(ERROR,
-               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                errmsg("COPY delimiter must not appear in the NULL specification")));
-
    /*
     * Don't allow COPY w/ OIDs to or from a table without them
     */
@@ -801,6 +927,52 @@ DoCopy(const CopyStmt *stmt)
     */
    attnumlist = CopyGetAttnums(rel, attnamelist);
 
+   /*
+    * Check that FORCE references valid COPY columns
+    */
+   if (force)
+   {
+       TupleDesc   tupDesc = RelationGetDescr(rel);
+       Form_pg_attribute *attr = tupDesc->attrs;
+       List       *cur;
+
+       force_atts = CopyGetAttnums(rel, force);
+
+       foreach(cur, force_atts)
+       {
+           int         attnum = lfirsti(cur);
+
+           if (!intMember(attnum, attnumlist))
+               ereport(ERROR,
+                       (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+                        errmsg("FORCE column \"%s\" not referenced by COPY",
+                               NameStr(attr[attnum - 1]->attname))));
+       }
+   }
+   
+   /*
+    * Check that LITERAL references valid COPY columns
+    */
+   if (literal)
+   {
+       List       *cur;
+       TupleDesc   tupDesc = RelationGetDescr(rel);
+       Form_pg_attribute *attr = tupDesc->attrs;
+
+       literal_atts = CopyGetAttnums(rel, literal);
+
+       foreach(cur, literal_atts)
+       {
+           int         attnum = lfirsti(cur);
+
+           if (!intMember(attnum, attnumlist))
+               ereport(ERROR,
+                       (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+                        errmsg("LITERAL column \"%s\" not referenced by COPY",
+                               NameStr(attr[attnum - 1]->attname))));
+       }
+   }
+   
    /*
     * Set up variables to avoid per-attribute overhead.
     */
@@ -864,7 +1036,8 @@ DoCopy(const CopyStmt *stmt)
                         errmsg("\"%s\" is a directory", filename)));
            }
        }
-       CopyFrom(rel, attnumlist, binary, oids, delim, null_print);
+       CopyFrom(rel, attnumlist, binary, oids, delim, null_print, csv_mode,
+                quote, escape, literal_atts);
    }
    else
    {                           /* copy from database to file */
@@ -926,7 +1099,8 @@ DoCopy(const CopyStmt *stmt)
                         errmsg("\"%s\" is a directory", filename)));
            }
        }
-       CopyTo(rel, attnumlist, binary, oids, delim, null_print);
+       CopyTo(rel, attnumlist, binary, oids, delim, null_print, csv_mode,
+               quote, escape, force_atts);
    }
 
    if (!pipe)
@@ -958,7 +1132,8 @@ DoCopy(const CopyStmt *stmt)
  */
 static void
 CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
-      char *delim, char *null_print)
+      char *delim, char *null_print, bool csv_mode, char *quote,
+      char *escape, List *force_atts)
 {
    HeapTuple   tuple;
    TupleDesc   tupDesc;
@@ -967,6 +1142,7 @@ CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
    int         attr_count;
    Form_pg_attribute *attr;
    FmgrInfo   *out_functions;
+   bool       *force_quote;
    Oid        *elements;
    bool       *isvarlena;
    char       *string;
@@ -988,11 +1164,12 @@ CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
    out_functions = (FmgrInfo *) palloc((num_phys_attrs + 1) * sizeof(FmgrInfo));
    elements = (Oid *) palloc((num_phys_attrs + 1) * sizeof(Oid));
    isvarlena = (bool *) palloc((num_phys_attrs + 1) * sizeof(bool));
+   force_quote = (bool *) palloc((num_phys_attrs + 1) * sizeof(bool));
    foreach(cur, attnumlist)
    {
        int         attnum = lfirsti(cur);
        Oid         out_func_oid;
-
+       
        if (binary)
            getTypeBinaryOutputInfo(attr[attnum - 1]->atttypid,
                                    &out_func_oid, &elements[attnum - 1],
@@ -1002,6 +1179,11 @@ CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
                              &out_func_oid, &elements[attnum - 1],
                              &isvarlena[attnum - 1]);
        fmgr_info(out_func_oid, &out_functions[attnum - 1]);
+
+       if (intMember(attnum, force_atts))
+           force_quote[attnum - 1] = true;
+       else
+           force_quote[attnum - 1] = false;
    }
 
    /*
@@ -1051,7 +1233,6 @@ CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
    while ((tuple = heap_getnext(scandesc, ForwardScanDirection)) != NULL)
    {
        bool        need_delim = false;
-
        CHECK_FOR_INTERRUPTS();
 
        MemoryContextReset(mycontext);
@@ -1113,7 +1294,15 @@ CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
                                                           value,
                                  ObjectIdGetDatum(elements[attnum - 1]),
                            Int32GetDatum(attr[attnum - 1]->atttypmod)));
-                   CopyAttributeOut(string, delim);
+                   if (csv_mode)
+                   {
+                       CopyAttributeOutCSV(string, delim, quote, escape,
+                                           (strcmp(string, null_print) == 0 ||
+                                           force_quote[attnum - 1]));
+                   }
+                   else
+                       CopyAttributeOut(string, delim);
+
                }
                else
                {
@@ -1148,6 +1337,7 @@ CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
    pfree(out_functions);
    pfree(elements);
    pfree(isvarlena);
+   pfree(force_quote);
 }
 
 
@@ -1243,7 +1433,8 @@ limit_printout_length(StringInfo buf)
  */
 static void
 CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
-        char *delim, char *null_print)
+        char *delim, char *null_print, bool csv_mode, char *quote,
+        char *escape, List *literal_atts)
 {
    HeapTuple   tuple;
    TupleDesc   tupDesc;
@@ -1256,9 +1447,10 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
    Oid        *elements;
    Oid         oid_in_element;
    ExprState **constraintexprs;
+   bool       *literal_nullstr;
    bool        hasConstraints = false;
-   int         i;
    int         attnum;
+   int         i;
    List       *cur;
    Oid         in_func_oid;
    Datum      *values;
@@ -1317,6 +1509,7 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
    defmap = (int *) palloc((num_phys_attrs + 1) * sizeof(int));
    defexprs = (ExprState **) palloc((num_phys_attrs + 1) * sizeof(ExprState *));
    constraintexprs = (ExprState **) palloc0((num_phys_attrs + 1) * sizeof(ExprState *));
+   literal_nullstr = (bool *) palloc((num_phys_attrs + 1) * sizeof(bool));
 
    for (attnum = 1; attnum <= num_phys_attrs; attnum++)
    {
@@ -1333,6 +1526,11 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
                             &in_func_oid, &elements[attnum - 1]);
        fmgr_info(in_func_oid, &in_functions[attnum - 1]);
 
+       if (intMember(attnum, literal_atts))
+           literal_nullstr[attnum - 1] = true;
+       else
+           literal_nullstr[attnum - 1] = false;
+       
        /* Get default info if needed */
        if (!intMember(attnum, attnumlist))
        {
@@ -1389,9 +1587,7 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
    ExecBSInsertTriggers(estate, resultRelInfo);
 
    if (!binary)
-   {
        file_has_oids = oids;   /* must rely on user to tell us this... */
-   }
    else
    {
        /* Read and verify binary header */
@@ -1500,6 +1696,7 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
 
            if (file_has_oids)
            {
+               /* can't be in CSV mode here */
                string = CopyReadAttribute(delim, null_print,
                                           &result, &isnull);
 
@@ -1538,14 +1735,27 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
                             errmsg("missing data for column \"%s\"",
                                    NameStr(attr[m]->attname))));
 
-               string = CopyReadAttribute(delim, null_print,
-                                          &result, &isnull);
-
-               if (isnull)
+               if (csv_mode)
                {
-                   /* we read an SQL NULL, no need to do anything */
+                   string = CopyReadAttributeCSV(delim, null_print, quote,
+                                                 escape, &result, &isnull);
+                   if (result == UNTERMINATED_FIELD)
+                       ereport(ERROR,
+                               (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+                                errmsg("unterminated CSV quoted field")));
                }
                else
+                   string = CopyReadAttribute(delim, null_print, 
+                                              &result, &isnull);
+
+               if (csv_mode && isnull && literal_nullstr[m])
+               {
+                   string = null_print;    /* set to NULL string */
+                   isnull = false;
+               }
+
+               /* we read an SQL NULL, no need to do anything */
+               if (!isnull)
                {
                    copy_attname = NameStr(attr[m]->attname);
                    values[m] = FunctionCall3(&in_functions[m],
@@ -1732,11 +1942,12 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
    pfree(values);
    pfree(nulls);
 
-   if (!binary)
-   {
-       pfree(in_functions);
-       pfree(elements);
-   }
+   pfree(in_functions);
+   pfree(elements);
+   pfree(defmap);
+   pfree(defexprs);
+   pfree(constraintexprs);
+   pfree(literal_nullstr);
 
    ExecDropTupleTable(tupleTable, true);
 
@@ -2070,6 +2281,152 @@ CopyReadAttribute(const char *delim, const char *null_print,
    return attribute_buf.data;
 }
 
+
+/*
+ * Read the value of a single attribute in CSV mode, 
+ * performing de-escaping as needed. Escaping does not follow the normal
+ * PostgreSQL text mode, but instead "standard" (i.e. common) CSV usage.
+ *
+ * Quoted fields can span lines, in which case the line end is embedded
+ * in the returned string.
+ *
+ * null_print is the null marker string.  Note that this is compared to
+ * the pre-de-escaped input string (thus if it is quoted it is not a NULL).
+ *
+ * *result is set to indicate what terminated the read:
+ *     NORMAL_ATTR:    column delimiter
+ *     END_OF_LINE:    end of line
+ *      UNTERMINATED_FIELD no quote detected at end of a quoted field
+ *
+ * In any case, the string read up to the terminator (or end of file)
+ * is returned.
+ *
+ * *isnull is set true or false depending on whether the input matched
+ * the null marker.  Note that the caller cannot check this since the
+ * returned string will be the post-de-escaping equivalent, which may
+ * look the same as some valid data string.
+ *----------
+ */
+
+static char *
+CopyReadAttributeCSV(const char *delim, const char *null_print, char *quote,
+                    char *escape, CopyReadResult *result, bool *isnull)
+{
+   char        delimc = delim[0];
+   char        quotec = quote[0];
+   char        escapec = escape[0];
+   char        c;
+   int         start_cursor = line_buf.cursor;
+   int         end_cursor = start_cursor;
+   int         input_len;
+   bool        in_quote = false;
+   bool        saw_quote = false;
+
+   /* reset attribute_buf to empty */
+   attribute_buf.len = 0;
+   attribute_buf.data[0] = '\0';
+
+   /* set default status */
+   *result = END_OF_LINE;
+
+   for (;;)
+   {
+       /* handle multiline quoted fields */
+       if (in_quote && line_buf.cursor >= line_buf.len)
+       {
+           bool done;
+
+           switch(eol_type)
+           {
+               case EOL_NL:
+                   appendStringInfoString(&attribute_buf,"\n");
+                   break;
+               case EOL_CR:
+                   appendStringInfoString(&attribute_buf,"\r");
+                   break;
+               case EOL_CRNL:
+                   appendStringInfoString(&attribute_buf,"\r\n");
+                   break;
+               case EOL_UNKNOWN:
+                   /* shouldn't happen - just keep going */
+                   break;
+           }
+
+           copy_lineno++;
+           done = CopyReadLine();
+           if (done && line_buf.len == 0)
+               break;
+           start_cursor = line_buf.cursor;
+       }
+
+       end_cursor = line_buf.cursor;
+       if (line_buf.cursor >= line_buf.len)
+           break;
+       c = line_buf.data[line_buf.cursor++];
+       /* 
+        * unquoted field delimiter 
+        */
+       if (!in_quote && c == delimc)
+       {
+           *result = NORMAL_ATTR;
+           break;
+       }
+       /* 
+        * start of quoted field (or part of field) 
+        */
+       if (!in_quote && c == quotec)
+       {
+           saw_quote = true;
+           in_quote = true;
+           continue;
+       }
+       /* 
+        * escape within a quoted field
+        */
+       if (in_quote && c == escapec)
+       {
+           /* 
+            * peek at the next char if available, and escape it if it
+            * is an escape char or a quote char
+            */
+           if (line_buf.cursor <= line_buf.len)
+           {
+               char nextc = line_buf.data[line_buf.cursor];
+               if (nextc == escapec || nextc == quotec)
+               {
+                   appendStringInfoCharMacro(&attribute_buf, nextc);
+                   line_buf.cursor++;
+                   continue;
+               }
+           }
+       }
+       /*
+        * end of quoted field. 
+        * Must do this test after testing for escape in case quote char
+        * and escape char are the same (which is the common case).
+        */
+       if (in_quote && c == quotec)
+       {
+           in_quote = false;
+           continue;
+       }
+       appendStringInfoCharMacro(&attribute_buf, c);
+   }
+
+   if (in_quote)
+       *result = UNTERMINATED_FIELD;
+
+   /* check whether raw input matched null marker */
+   input_len = end_cursor - start_cursor;
+   if (!saw_quote && input_len == strlen(null_print) &&
+       strncmp(&line_buf.data[start_cursor], null_print, input_len) == 0)
+       *isnull = true;
+   else
+       *isnull = false;
+
+   return attribute_buf.data;
+}
+
 /*
  * Read a binary attribute
  */
@@ -2195,6 +2552,73 @@ CopyAttributeOut(char *server_string, char *delim)
    }
 }
 
+/*
+ * Send CSV representation of one attribute, with conversion and 
+ * CSV type escaping
+ */
+static void
+CopyAttributeOutCSV(char *server_string, char *delim, char *quote,
+                   char *escape, bool force_quote)
+{
+   char       *string;
+   char        c;
+   char        delimc = delim[0];
+   char        quotec = quote[0];
+   char        escapec = escape[0];
+   bool        need_quote = force_quote;
+   char        *test_string;
+   bool        same_encoding;
+   int         mblen;
+   int         i;
+
+   same_encoding = (server_encoding == client_encoding);
+   if (!same_encoding)
+       string = (char *) pg_server_to_client((unsigned char *) server_string,
+                                             strlen(server_string));
+   else
+       string = server_string;
+
+   /* have to run through the string twice,
+    * first time to see if it needs quoting, second to actually send it
+    */
+
+   for(test_string = string; 
+       !need_quote && (c = *test_string) != '\0'; 
+       test_string += mblen)
+   {
+       if (c == delimc || c == quotec || c == '\n' || c == '\r')
+           need_quote = true;
+       if (!same_encoding)
+           mblen = pg_encoding_mblen(client_encoding, test_string);
+       else
+           mblen = 1;
+   }
+
+   if (need_quote)
+       CopySendChar(quotec);
+
+   for (; (c = *string) != '\0'; string += mblen)
+   {
+       if (c == quotec || c == escapec)
+           CopySendChar(escapec);
+
+       CopySendChar(c);
+
+       if (!same_encoding)
+       {
+           /* send additional bytes of the char, if any */
+           mblen = pg_encoding_mblen(client_encoding, string);
+           for (i = 1; i < mblen; i++)
+               CopySendChar(string[i]);
+       }
+       else
+           mblen = 1;
+   }
+
+   if (need_quote)
+       CopySendChar(quotec);
+}
+
 /*
  * CopyGetAttnums - build an integer list of attnums to be copied
  *
index 84efc2875eee44c7dd043fb06a6285839dcdb425..461a39dbb706c183d3c6b1feaf0286a15bf284db 100644 (file)
@@ -11,7 +11,7 @@
  *
  *
  * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.450 2004/04/05 03:07:26 momjian Exp $
+ *   $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.451 2004/04/19 17:22:30 momjian Exp $
  *
  * HISTORY
  *   AUTHOR            DATE            MAJOR EVENT
@@ -343,7 +343,7 @@ static void doNegateFloat(Value *v);
    CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE
    CLUSTER COALESCE COLLATE COLUMN COMMENT COMMIT
    COMMITTED CONSTRAINT CONSTRAINTS CONVERSION_P CONVERT COPY CREATE CREATEDB
-   CREATEUSER CROSS CURRENT_DATE CURRENT_TIME
+   CREATEUSER CROSS CSV CURRENT_DATE CURRENT_TIME
    CURRENT_TIMESTAMP CURRENT_USER CURSOR CYCLE
 
    DATABASE DAY_P DEALLOCATE DEC DECIMAL_P DECLARE DEFAULT DEFAULTS
@@ -370,7 +370,7 @@ static void doNegateFloat(Value *v);
    KEY
 
    LANCOMPILER LANGUAGE LARGE_P LAST_P LEADING LEFT LEVEL LIKE LIMIT
-   LISTEN LOAD LOCAL LOCALTIME LOCALTIMESTAMP LOCATION
+   LISTEN LITERAL LOAD LOCAL LOCALTIME LOCALTIMESTAMP LOCATION
    LOCK_P
 
    MATCH MAXVALUE MINUTE_P MINVALUE MODE MONTH_P MOVE
@@ -386,6 +386,8 @@ static void doNegateFloat(Value *v);
    PRECISION PRESERVE PREPARE PRIMARY 
    PRIOR PRIVILEGES PROCEDURAL PROCEDURE
 
+   QUOTE
+
    READ REAL RECHECK REFERENCES REINDEX RELATIVE_P RENAME REPEATABLE REPLACE
    RESET RESTART RESTRICT RETURNS REVOKE RIGHT ROLLBACK ROW ROWS
    RULE
@@ -1360,6 +1362,26 @@ copy_opt_item:
                {
                    $$ = makeDefElem("null", (Node *)makeString($3));
                }
+           | CSV
+               {
+                   $$ = makeDefElem("csv", (Node *)makeInteger(TRUE));
+               }
+           | QUOTE opt_as Sconst
+               {
+                   $$ = makeDefElem("quote", (Node *)makeString($3));
+               }
+           | ESCAPE opt_as Sconst
+               {
+                   $$ = makeDefElem("escape", (Node *)makeString($3));
+               }
+           | FORCE columnList
+               {
+                   $$ = makeDefElem("force", (Node *)$2);
+               }
+           | LITERAL columnList
+               {
+                   $$ = makeDefElem("literal", (Node *)$2);
+               }
        ;
 
 /* The following exist for backward compatibility */
@@ -7420,6 +7442,7 @@ unreserved_keyword:
            | COPY
            | CREATEDB
            | CREATEUSER
+           | CSV
            | CURSOR
            | CYCLE
            | DATABASE
@@ -7473,6 +7496,7 @@ unreserved_keyword:
            | LAST_P
            | LEVEL
            | LISTEN
+           | LITERAL
            | LOAD
            | LOCAL
            | LOCATION
@@ -7507,6 +7531,7 @@ unreserved_keyword:
            | PRIVILEGES
            | PROCEDURAL
            | PROCEDURE
+           | QUOTE
            | READ
            | RECHECK
            | REINDEX
index 54ac767126f01f22789d62516d59ef1713c44671..613d22ac30c421a256b6064093c72abca190c2fc 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.147 2004/03/11 01:47:40 ishii Exp $
+ *   $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.148 2004/04/19 17:22:31 momjian Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -90,6 +90,7 @@ static const ScanKeyword ScanKeywords[] = {
    {"createdb", CREATEDB},
    {"createuser", CREATEUSER},
    {"cross", CROSS},
+   {"csv", CSV},
    {"current_date", CURRENT_DATE},
    {"current_time", CURRENT_TIME},
    {"current_timestamp", CURRENT_TIMESTAMP},
@@ -186,6 +187,7 @@ static const ScanKeyword ScanKeywords[] = {
    {"like", LIKE},
    {"limit", LIMIT},
    {"listen", LISTEN},
+   {"literal", LITERAL},
    {"load", LOAD},
    {"local", LOCAL},
    {"localtime", LOCALTIME},
@@ -248,6 +250,7 @@ static const ScanKeyword ScanKeywords[] = {
    {"privileges", PRIVILEGES},
    {"procedural", PROCEDURAL},
    {"procedure", PROCEDURE},
+   {"quote", QUOTE},
    {"read", READ},
    {"real", REAL},
    {"recheck", RECHECK},
index 856a9e8589e75401de264b18bd215075d9a3f531..a0eda360c4e86b387840205f0d7abc4b0feb1959 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/tcop/fastpath.c,v 1.71 2004/01/07 18:56:27 neilc Exp $
+ *   $PostgreSQL: pgsql/src/backend/tcop/fastpath.c,v 1.72 2004/04/19 17:22:31 momjian Exp $
  *
  * NOTES
  *   This cruft is the server side of PQfn.
@@ -154,8 +154,7 @@ SendFunctionResult(Datum retval, bool isnull, Oid rettype, int16 format)
            bool        typisvarlena;
            char       *outputstr;
 
-           getTypeOutputInfo(rettype,
-                             &typoutput, &typelem, &typisvarlena);
+           getTypeOutputInfo(rettype, &typoutput, &typelem, &typisvarlena);
            outputstr = DatumGetCString(OidFunctionCall3(typoutput,
                                                         retval,
                                               ObjectIdGetDatum(typelem),
index 49b8b8a064d6e0161589b1f34aa70e9aefe08a20..e01afb821423491b989d0c754faa7adc01509155 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 2000-2003, PostgreSQL Global Development Group
  *
- * $PostgreSQL: pgsql/src/bin/psql/copy.c,v 1.43 2004/04/12 15:58:52 momjian Exp $
+ * $PostgreSQL: pgsql/src/bin/psql/copy.c,v 1.44 2004/04/19 17:22:31 momjian Exp $
  */
 #include "postgres_fe.h"
 #include "copy.h"
@@ -66,8 +66,13 @@ struct copy_options
    bool        from;
    bool        binary;
    bool        oids;
+   bool        csv_mode;
    char       *delim;
    char       *null;
+   char       *quote;
+   char       *escape;
+   char       *force_list;
+   char       *literal_list;
 };
 
 
@@ -81,6 +86,10 @@ free_copy_options(struct copy_options * ptr)
    free(ptr->file);
    free(ptr->delim);
    free(ptr->null);
+   free(ptr->quote);
+   free(ptr->escape);
+   free(ptr->force_list);
+   free(ptr->literal_list);
    free(ptr);
 }
 
@@ -272,11 +281,19 @@ parse_slash_copy(const char *args)
 
        while (token)
        {
+           bool fetch_next;
+
+           fetch_next = true;
+           
            /* someday allow BINARY here */
            if (strcasecmp(token, "oids") == 0)
            {
                result->oids = true;
            }
+           else if (strcasecmp(token, "csv") == 0)
+           {
+               result->csv_mode = true;
+           }
            else if (strcasecmp(token, "delimiter") == 0)
            {
                token = strtokx(NULL, whitespace, NULL, "'",
@@ -301,11 +318,78 @@ parse_slash_copy(const char *args)
                else
                    goto error;
            }
+           else if (strcasecmp(token, "quote") == 0)
+           {
+               token = strtokx(NULL, whitespace, NULL, "'",
+                               '\\', false, pset.encoding);
+               if (token && strcasecmp(token, "as") == 0)
+                   token = strtokx(NULL, whitespace, NULL, "'",
+                                   '\\', false, pset.encoding);
+               if (token)
+                   result->quote = pg_strdup(token);
+               else
+                   goto error;
+           }
+           else if (strcasecmp(token, "escape") == 0)
+           {
+               token = strtokx(NULL, whitespace, NULL, "'",
+                               '\\', false, pset.encoding);
+               if (token && strcasecmp(token, "as") == 0)
+                   token = strtokx(NULL, whitespace, NULL, "'",
+                                   '\\', false, pset.encoding);
+               if (token)
+                   result->escape = pg_strdup(token);
+               else
+                   goto error;
+           }
+           else if (strcasecmp(token, "force") == 0)
+           {
+               /* handle column list */
+               fetch_next = false;
+               for (;;)
+               {
+                   token = strtokx(NULL, whitespace, ",", "\"",
+                                   0, false, pset.encoding);
+                   if (!token || strchr(",", token[0]))
+                       goto error;
+                   if (!result->force_list)
+                       result->force_list = pg_strdup(token);
+                   else
+                       xstrcat(&result->force_list, token);
+                   token = strtokx(NULL, whitespace, ",", "\"",
+                                   0, false, pset.encoding);
+                   if (!token || token[0] != ',')
+                       break;
+                   xstrcat(&result->force_list, token);
+               }
+           }
+           else if (strcasecmp(token, "literal") == 0)
+           {
+               /* handle column list */
+               fetch_next = false;
+               for (;;)
+               {
+                   token = strtokx(NULL, whitespace, ",", "\"",
+                                   0, false, pset.encoding);
+                   if (!token || strchr(",", token[0]))
+                       goto error;
+                   if (!result->literal_list)
+                       result->literal_list = pg_strdup(token);
+                   else
+                       xstrcat(&result->literal_list, token);
+                   token = strtokx(NULL, whitespace, ",", "\"",
+                                   0, false, pset.encoding);
+                   if (!token || token[0] != ',')
+                       break;
+                   xstrcat(&result->literal_list, token);
+               }
+           }
            else
                goto error;
 
-           token = strtokx(NULL, whitespace, NULL, NULL,
-                           0, false, pset.encoding);
+           if (fetch_next)
+               token = strtokx(NULL, whitespace, NULL, NULL,
+                               0, false, pset.encoding);
        }
    }
 
@@ -340,7 +424,7 @@ do_copy(const char *args)
    PGresult   *result;
    bool        success;
    struct stat st;
-
+   
    /* parse options */
    options = parse_slash_copy(args);
 
@@ -379,6 +463,7 @@ do_copy(const char *args)
                              options->delim);
    }
 
+   /* There is no backward-compatible CSV syntax */
    if (options->null)
    {
        if (options->null[0] == '\'')
@@ -387,6 +472,37 @@ do_copy(const char *args)
            appendPQExpBuffer(&query, " WITH NULL AS '%s'", options->null);
    }
 
+   if (options->csv_mode)
+   {
+       appendPQExpBuffer(&query, " CSV");
+   }
+   
+   if (options->quote)
+   {
+       if (options->quote[0] == '\'')
+           appendPQExpBuffer(&query, " QUOTE AS %s", options->quote);
+       else
+           appendPQExpBuffer(&query, " QUOTE AS '%s'", options->quote);
+   }
+
+   if (options->escape)
+   {
+       if (options->escape[0] == '\'')
+           appendPQExpBuffer(&query, " ESCAPE AS %s", options->escape);
+       else
+           appendPQExpBuffer(&query, " ESCAPE AS '%s'", options->escape);
+   }
+
+   if (options->force_list)
+   {
+       appendPQExpBuffer(&query, " FORCE %s", options->force_list);
+   }
+
+   if (options->literal_list)
+   {
+       appendPQExpBuffer(&query, " LITERAL %s", options->literal_list);
+   }
+
    if (options->from)
    {
        if (options->file)