Revert COPY OUT to follow the pre-8.3 handling of ASCII control characters,
authorTom Lane <tgl@sss.pgh.pa.us>
Mon, 3 Dec 2007 00:03:05 +0000 (00:03 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Mon, 3 Dec 2007 00:03:05 +0000 (00:03 +0000)
namely that \r, \n, \t, \b, \f, \v are dumped as those two-character
representations rather than a backslash and the literal control character.
I had made it do the other to save some code, but this was ill-advised,
because dump files in which these characters appear literally are prone to
newline mangling.  Fortunately, doing it the old way should only cost a few
more lines of code, and not slow down the copy loop materially.
Per bug #3795 from Lou Duchez.

src/backend/commands/copy.c

index c68d828fea0337b2da732a88981ad8073508eb77..55ecf0098d4ce504d3e3c9a9903eff14b680a56c 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.289 2007/11/30 21:22:53 tgl Exp $
+ *   $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.290 2007/12/03 00:03:05 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -3102,27 +3102,43 @@ CopyAttributeOutText(CopyState cstate, char *string)
            }
            else if ((unsigned char) c < (unsigned char) 0x20)
            {
+               /*
+                * \r and \n must be escaped, the others are traditional.
+                * We prefer to dump these using the C-like notation, rather
+                * than a backslash and the literal character, because it
+                * makes the dump file a bit more proof against Microsoftish
+                * data mangling.
+                */
                switch (c)
                {
-                       /*
-                        * \r and \n must be escaped, the others are
-                        * traditional
-                        */
                    case '\b':
+                       c = 'b';
+                       break;
                    case '\f':
+                       c = 'f';
+                       break;
                    case '\n':
+                       c = 'n';
+                       break;
                    case '\r':
+                       c = 'r';
+                       break;
                    case '\t':
+                       c = 't';
+                       break;
                    case '\v':
-                       DUMPSOFAR();
-                       CopySendChar(cstate, '\\');
-                       start = ptr++;  /* we include char in next run */
+                       c = 'v';
                        break;
                    default:
                        /* All ASCII control chars are length 1 */
                        ptr++;
-                       break;
+                       continue;       /* fall to end of loop */
                }
+               /* if we get here, we need to convert the control char */
+               DUMPSOFAR();
+               CopySendChar(cstate, '\\');
+               CopySendChar(cstate, c);
+               start = ++ptr;          /* do not include char in next run */
            }
            else if (IS_HIGHBIT_SET(c))
                ptr += pg_encoding_mblen(cstate->client_encoding, ptr);
@@ -3143,27 +3159,43 @@ CopyAttributeOutText(CopyState cstate, char *string)
            }
            else if ((unsigned char) c < (unsigned char) 0x20)
            {
+               /*
+                * \r and \n must be escaped, the others are traditional.
+                * We prefer to dump these using the C-like notation, rather
+                * than a backslash and the literal character, because it
+                * makes the dump file a bit more proof against Microsoftish
+                * data mangling.
+                */
                switch (c)
                {
-                       /*
-                        * \r and \n must be escaped, the others are
-                        * traditional
-                        */
                    case '\b':
+                       c = 'b';
+                       break;
                    case '\f':
+                       c = 'f';
+                       break;
                    case '\n':
+                       c = 'n';
+                       break;
                    case '\r':
+                       c = 'r';
+                       break;
                    case '\t':
+                       c = 't';
+                       break;
                    case '\v':
-                       DUMPSOFAR();
-                       CopySendChar(cstate, '\\');
-                       start = ptr++;  /* we include char in next run */
+                       c = 'v';
                        break;
                    default:
                        /* All ASCII control chars are length 1 */
                        ptr++;
-                       break;
+                       continue;       /* fall to end of loop */
                }
+               /* if we get here, we need to convert the control char */
+               DUMPSOFAR();
+               CopySendChar(cstate, '\\');
+               CopySendChar(cstate, c);
+               start = ++ptr;          /* do not include char in next run */
            }
            else
                ptr++;