From eb0a7735ba1ede6a35b80d73f6c371a8b1220552 Mon Sep 17 00:00:00 2001 From: Andrew Dunstan Date: Wed, 12 Sep 2007 20:49:27 +0000 Subject: [PATCH] Perform post-escaping encoding validity checks on SQL literals and COPY input so that invalidly encoded data cannot enter the database by these means. --- src/backend/commands/copy.c | 19 +++++++++++++++++-- src/backend/parser/scan.l | 22 +++++++++++++++++++++- 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index d28a6ad11c..fdfe5ea965 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.286 2007/09/07 20:59:26 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.287 2007/09/12 20:49:27 adunstan Exp $ * *------------------------------------------------------------------------- */ @@ -2685,6 +2685,7 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals) char *start_ptr; char *end_ptr; int input_len; + bool saw_high_bit = false; /* Make sure space remains in fieldvals[] */ if (fieldno >= maxfields) @@ -2749,6 +2750,8 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals) } } c = val & 0377; + if (IS_HIGHBIT_SET(c)) + saw_high_bit = true; } break; case 'x': @@ -2772,6 +2775,8 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals) } } c = val & 0xff; + if (IS_HIGHBIT_SET(c)) + saw_high_bit = true; } } break; @@ -2799,7 +2804,7 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals) * literally */ } - } + } /* Add c to output string */ *output_ptr++ = c; @@ -2808,6 +2813,16 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals) /* Terminate attribute value in output area */ *output_ptr++ = '\0'; + /* If we de-escaped a char with the high bit set, make sure + * we still have valid data for the db encoding. Avoid calling strlen + * here for the sake of efficiency. + */ + if (saw_high_bit) + { + char *fld = fieldvals[fieldno]; + pg_verifymbstr(fld, output_ptr - (fld + 1), false); + } + /* Check whether raw input matched null marker */ input_len = end_ptr - start_ptr; if (input_len == cstate->null_print_len && diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index baa5992277..a138a66131 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -24,7 +24,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.140 2007/08/12 20:18:06 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.141 2007/09/12 20:49:27 adunstan Exp $ * *------------------------------------------------------------------------- */ @@ -60,6 +60,7 @@ bool escape_string_warning = true; bool standard_conforming_strings = false; static bool warn_on_first_escape; +static bool saw_high_bit = false; /* * literalbuf is used to accumulate literal values when multiple rules @@ -426,6 +427,7 @@ other . {xqstart} { warn_on_first_escape = true; + saw_high_bit = false; SET_YYLLOC(); if (standard_conforming_strings) BEGIN(xq); @@ -435,6 +437,7 @@ other . } {xestart} { warn_on_first_escape = false; + saw_high_bit = false; SET_YYLLOC(); BEGIN(xe); startlit(); @@ -443,6 +446,11 @@ other . {quotefail} { yyless(1); BEGIN(INITIAL); + /* check that the data remains valid if it might have been + * made invalid by unescaping any chars. + */ + if (saw_high_bit) + pg_verifymbstr(literalbuf, literallen, false); yylval.str = litbufdup(); return SCONST; } @@ -475,12 +483,16 @@ other . check_escape_warning(); addlitchar(c); + if (IS_HIGHBIT_SET(c)) + saw_high_bit = true; } {xehexesc} { unsigned char c = strtoul(yytext+2, NULL, 16); check_escape_warning(); addlitchar(c); + if (IS_HIGHBIT_SET(c)) + saw_high_bit = true; } {quotecontinue} { /* ignore */ @@ -892,6 +904,14 @@ litbufdup(void) static unsigned char unescape_single_char(unsigned char c) { + /* Normally we wouldn't expect to see \n where n has its high bit set + * but we set the flag to check the string if we do get it, so + * that this doesn't become a way of getting around the coding validity + * checks. + */ + if (IS_HIGHBIT_SET(c)) + saw_high_bit = true; + switch (c) { case 'b': -- 2.39.5