diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/bin/psql/common.c | 28 | ||||
-rw-r--r-- | src/bin/psql/psqlscanslash.l | 4 | ||||
-rw-r--r-- | src/bin/psql/stringutils.c | 8 | ||||
-rw-r--r-- | src/bin/psql/tab-complete.c | 4 | ||||
-rw-r--r-- | src/bin/scripts/common.c | 4 | ||||
-rw-r--r-- | src/common/jsonapi.c | 6 | ||||
-rw-r--r-- | src/common/wchar.c | 15 | ||||
-rw-r--r-- | src/fe_utils/print.c | 3 | ||||
-rw-r--r-- | src/include/mb/pg_wchar.h | 1 | ||||
-rw-r--r-- | src/interfaces/libpq/fe-print.c | 3 | ||||
-rw-r--r-- | src/interfaces/libpq/fe-protocol3.c | 6 |
11 files changed, 57 insertions, 25 deletions
diff --git a/src/bin/psql/common.c b/src/bin/psql/common.c index 10314288499..94d5d60f879 100644 --- a/src/bin/psql/common.c +++ b/src/bin/psql/common.c @@ -29,6 +29,8 @@ #include "portability/instr_time.h" #include "settings.h" +#define PQmblenBounded(s, e) strnlen(s, PQmblen(s, e)) + static bool DescribeQuery(const char *query, double *elapsed_msec); static bool ExecQueryUsingCursor(const char *query, double *elapsed_msec); static bool command_no_begin(const char *query); @@ -1842,7 +1844,7 @@ skip_white_space(const char *query) while (*query) { - int mblen = PQmblen(query, pset.encoding); + int mblen = PQmblenBounded(query, pset.encoding); /* * Note: we assume the encoding is a superset of ASCII, so that for @@ -1879,7 +1881,7 @@ skip_white_space(const char *query) query++; break; } - query += PQmblen(query, pset.encoding); + query += PQmblenBounded(query, pset.encoding); } } else if (cnestlevel > 0) @@ -1914,7 +1916,7 @@ command_no_begin(const char *query) */ wordlen = 0; while (isalpha((unsigned char) query[wordlen])) - wordlen += PQmblen(&query[wordlen], pset.encoding); + wordlen += PQmblenBounded(&query[wordlen], pset.encoding); /* * Transaction control commands. These should include every keyword that @@ -1945,7 +1947,7 @@ command_no_begin(const char *query) wordlen = 0; while (isalpha((unsigned char) query[wordlen])) - wordlen += PQmblen(&query[wordlen], pset.encoding); + wordlen += PQmblenBounded(&query[wordlen], pset.encoding); if (wordlen == 11 && pg_strncasecmp(query, "transaction", 11) == 0) return true; @@ -1979,7 +1981,7 @@ command_no_begin(const char *query) wordlen = 0; while (isalpha((unsigned char) query[wordlen])) - wordlen += PQmblen(&query[wordlen], pset.encoding); + wordlen += PQmblenBounded(&query[wordlen], pset.encoding); if (wordlen == 8 && pg_strncasecmp(query, "database", 8) == 0) return true; @@ -1995,7 +1997,7 @@ command_no_begin(const char *query) wordlen = 0; while (isalpha((unsigned char) query[wordlen])) - wordlen += PQmblen(&query[wordlen], pset.encoding); + wordlen += PQmblenBounded(&query[wordlen], pset.encoding); } if (wordlen == 5 && pg_strncasecmp(query, "index", 5) == 0) @@ -2006,7 +2008,7 @@ command_no_begin(const char *query) wordlen = 0; while (isalpha((unsigned char) query[wordlen])) - wordlen += PQmblen(&query[wordlen], pset.encoding); + wordlen += PQmblenBounded(&query[wordlen], pset.encoding); if (wordlen == 12 && pg_strncasecmp(query, "concurrently", 12) == 0) return true; @@ -2023,7 +2025,7 @@ command_no_begin(const char *query) wordlen = 0; while (isalpha((unsigned char) query[wordlen])) - wordlen += PQmblen(&query[wordlen], pset.encoding); + wordlen += PQmblenBounded(&query[wordlen], pset.encoding); /* ALTER SYSTEM isn't allowed in xacts */ if (wordlen == 6 && pg_strncasecmp(query, "system", 6) == 0) @@ -2046,7 +2048,7 @@ command_no_begin(const char *query) wordlen = 0; while (isalpha((unsigned char) query[wordlen])) - wordlen += PQmblen(&query[wordlen], pset.encoding); + wordlen += PQmblenBounded(&query[wordlen], pset.encoding); if (wordlen == 8 && pg_strncasecmp(query, "database", 8) == 0) return true; @@ -2061,7 +2063,7 @@ command_no_begin(const char *query) query = skip_white_space(query); wordlen = 0; while (isalpha((unsigned char) query[wordlen])) - wordlen += PQmblen(&query[wordlen], pset.encoding); + wordlen += PQmblenBounded(&query[wordlen], pset.encoding); /* * REINDEX [ TABLE | INDEX ] CONCURRENTLY are not allowed in @@ -2080,7 +2082,7 @@ command_no_begin(const char *query) wordlen = 0; while (isalpha((unsigned char) query[wordlen])) - wordlen += PQmblen(&query[wordlen], pset.encoding); + wordlen += PQmblenBounded(&query[wordlen], pset.encoding); if (wordlen == 12 && pg_strncasecmp(query, "concurrently", 12) == 0) return true; @@ -2100,7 +2102,7 @@ command_no_begin(const char *query) wordlen = 0; while (isalpha((unsigned char) query[wordlen])) - wordlen += PQmblen(&query[wordlen], pset.encoding); + wordlen += PQmblenBounded(&query[wordlen], pset.encoding); if (wordlen == 3 && pg_strncasecmp(query, "all", 3) == 0) return true; @@ -2136,7 +2138,7 @@ is_select_command(const char *query) */ wordlen = 0; while (isalpha((unsigned char) query[wordlen])) - wordlen += PQmblen(&query[wordlen], pset.encoding); + wordlen += PQmblenBounded(&query[wordlen], pset.encoding); if (wordlen == 6 && pg_strncasecmp(query, "select", 6) == 0) return true; diff --git a/src/bin/psql/psqlscanslash.l b/src/bin/psql/psqlscanslash.l index 4dff84d6271..390dc6c5c90 100644 --- a/src/bin/psql/psqlscanslash.l +++ b/src/bin/psql/psqlscanslash.l @@ -28,6 +28,8 @@ %{ #include "fe_utils/psqlscan_int.h" +#define PQmblenBounded(s, e) strnlen(s, PQmblen(s, e)) + /* * We must have a typedef YYSTYPE for yylex's first argument, but this lexer * doesn't presently make use of that argument, so just declare it as int. @@ -753,7 +755,7 @@ dequote_downcase_identifier(char *str, bool downcase, int encoding) { if (downcase && !inquotes) *cp = pg_tolower((unsigned char) *cp); - cp += PQmblen(cp, encoding); + cp += PQmblenBounded(cp, encoding); } } } diff --git a/src/bin/psql/stringutils.c b/src/bin/psql/stringutils.c index c521749661c..c96b2fe1965 100644 --- a/src/bin/psql/stringutils.c +++ b/src/bin/psql/stringutils.c @@ -12,6 +12,8 @@ #include "common.h" #include "stringutils.h" +#define PQmblenBounded(s, e) strnlen(s, PQmblen(s, e)) + /* * Replacement for strtok() (a.k.a. poor man's flex) @@ -143,7 +145,7 @@ strtokx(const char *s, /* okay, we have a quoted token, now scan for the closer */ char thisquote = *p++; - for (; *p; p += PQmblen(p, encoding)) + for (; *p; p += PQmblenBounded(p, encoding)) { if (*p == escape && p[1] != '\0') p++; /* process escaped anything */ @@ -262,7 +264,7 @@ strip_quotes(char *source, char quote, char escape, int encoding) else if (c == escape && src[1] != '\0') src++; /* process escaped character */ - i = PQmblen(src, encoding); + i = PQmblenBounded(src, encoding); while (i--) *dst++ = *src++; } @@ -324,7 +326,7 @@ quote_if_needed(const char *source, const char *entails_quote, else if (strchr(entails_quote, c)) need_quotes = true; - i = PQmblen(src, encoding); + i = PQmblenBounded(src, encoding); while (i--) *dst++ = *src++; } diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index eb018854a5c..4f46cd949d9 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -73,6 +73,8 @@ #define USE_FILENAME_QUOTING_FUNCTIONS 1 #endif +#define PQmblenBounded(s, e) strnlen(s, PQmblen(s, e)) + /* word break characters */ #define WORD_BREAKS "\t\n@$><=;|&{() " @@ -4140,7 +4142,7 @@ _complete_from_query(const char *simple_query, while (*pstr) { char_length++; - pstr += PQmblen(pstr, pset.encoding); + pstr += PQmblenBounded(pstr, pset.encoding); } /* Free any prior result */ diff --git a/src/bin/scripts/common.c b/src/bin/scripts/common.c index afa62e4da73..d446d7af9fa 100644 --- a/src/bin/scripts/common.c +++ b/src/bin/scripts/common.c @@ -25,6 +25,8 @@ #define ERRCODE_UNDEFINED_TABLE "42P01" +#define PQmblenBounded(s, e) strnlen(s, PQmblen(s, e)) + /* * Provide strictly harmonized handling of --help and --version * options. @@ -368,7 +370,7 @@ splitTableColumnsSpec(const char *spec, int encoding, cp++; } else - cp += PQmblen(cp, encoding); + cp += PQmblenBounded(cp, encoding); } *table = pnstrdup(spec, cp - spec); *columns = cp; diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c index 9326f805366..6fe17a3d378 100644 --- a/src/common/jsonapi.c +++ b/src/common/jsonapi.c @@ -738,7 +738,7 @@ json_lex_string(JsonLexContext *lex) ch = (ch * 16) + (*s - 'A') + 10; else { - lex->token_terminator = s + pg_encoding_mblen(lex->input_encoding, s); + lex->token_terminator = s + pg_encoding_mblen_bounded(lex->input_encoding, s); return JSON_UNICODE_ESCAPE_FORMAT; } } @@ -844,7 +844,7 @@ json_lex_string(JsonLexContext *lex) default: /* Not a valid string escape, so signal error. */ lex->token_start = s; - lex->token_terminator = s + pg_encoding_mblen(lex->input_encoding, s); + lex->token_terminator = s + pg_encoding_mblen_bounded(lex->input_encoding, s); return JSON_ESCAPING_INVALID; } } @@ -858,7 +858,7 @@ json_lex_string(JsonLexContext *lex) * shown it's not a performance win. */ lex->token_start = s; - lex->token_terminator = s + pg_encoding_mblen(lex->input_encoding, s); + lex->token_terminator = s + pg_encoding_mblen_bounded(lex->input_encoding, s); return JSON_ESCAPING_INVALID; } diff --git a/src/common/wchar.c b/src/common/wchar.c index efaf1c155bb..08734802236 100644 --- a/src/common/wchar.c +++ b/src/common/wchar.c @@ -1549,6 +1549,11 @@ const pg_wchar_tbl pg_wchar_table[] = { /* * Returns the byte length of a multibyte character. + * + * Caution: when dealing with text that is not certainly valid in the + * specified encoding, the result may exceed the actual remaining + * string length. Callers that are not prepared to deal with that + * should use pg_encoding_mblen_bounded() instead. */ int pg_encoding_mblen(int encoding, const char *mbstr) @@ -1559,6 +1564,16 @@ pg_encoding_mblen(int encoding, const char *mbstr) } /* + * Returns the byte length of a multibyte character; but not more than + * the distance to end of string. + */ +int +pg_encoding_mblen_bounded(int encoding, const char *mbstr) +{ + return strnlen(mbstr, pg_encoding_mblen(encoding, mbstr)); +} + +/* * Returns the display length of a multibyte character. */ int diff --git a/src/fe_utils/print.c b/src/fe_utils/print.c index f3c176aa555..966a7721801 100644 --- a/src/fe_utils/print.c +++ b/src/fe_utils/print.c @@ -3652,6 +3652,9 @@ strlen_max_width(unsigned char *str, int *target_width, int encoding) curr_width += char_width; str += PQmblen((char *) str, encoding); + + if (str > end) /* Don't overrun invalid string */ + str = end; } *target_width = curr_width; diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h index 494aefc7fab..b43454b64ce 100644 --- a/src/include/mb/pg_wchar.h +++ b/src/include/mb/pg_wchar.h @@ -553,6 +553,7 @@ extern int pg_valid_server_encoding_id(int encoding); * earlier in this file are also available from libpgcommon. */ extern int pg_encoding_mblen(int encoding, const char *mbstr); +extern int pg_encoding_mblen_bounded(int encoding, const char *mbstr); extern int pg_encoding_dsplen(int encoding, const char *mbstr); extern int pg_encoding_verifymb(int encoding, const char *mbstr, int len); extern int pg_encoding_max_length(int encoding); diff --git a/src/interfaces/libpq/fe-print.c b/src/interfaces/libpq/fe-print.c index 784eaae48e5..61ba4e4ce7b 100644 --- a/src/interfaces/libpq/fe-print.c +++ b/src/interfaces/libpq/fe-print.c @@ -36,6 +36,7 @@ #include "libpq-fe.h" #include "libpq-int.h" +#define PQmblenBounded(s, e) strnlen(s, PQmblen(s, e)) static void do_field(const PQprintOpt *po, const PGresult *res, const int i, const int j, const int fs_len, @@ -365,7 +366,7 @@ do_field(const PQprintOpt *po, const PGresult *res, /* Detect whether field contains non-numeric data */ char ch = '0'; - for (p = pval; *p; p += PQmblen(p, res->client_encoding)) + for (p = pval; *p; p += PQmblenBounded(p, res->client_encoding)) { ch = *p; if (!((ch >= '0' && ch <= '9') || diff --git a/src/interfaces/libpq/fe-protocol3.c b/src/interfaces/libpq/fe-protocol3.c index 686299dce91..04fd4ba874b 100644 --- a/src/interfaces/libpq/fe-protocol3.c +++ b/src/interfaces/libpq/fe-protocol3.c @@ -39,6 +39,8 @@ ((id) == 'T' || (id) == 'D' || (id) == 'd' || (id) == 'V' || \ (id) == 'E' || (id) == 'N' || (id) == 'A') +#define PQmblenBounded(s, e) strnlen(s, PQmblen(s, e)) + static void handleSyncLoss(PGconn *conn, char id, int msgLength); static int getRowDescriptions(PGconn *conn, int msgLength); @@ -1241,7 +1243,7 @@ reportErrorPosition(PQExpBuffer msg, const char *query, int loc, int encoding) if (w <= 0) w = 1; scroffset += w; - qoffset += pg_encoding_mblen(encoding, &wquery[qoffset]); + qoffset += PQmblenBounded(&wquery[qoffset], encoding); } else { @@ -1309,7 +1311,7 @@ reportErrorPosition(PQExpBuffer msg, const char *query, int loc, int encoding) * width. */ scroffset = 0; - for (; i < msg->len; i += pg_encoding_mblen(encoding, &msg->data[i])) + for (; i < msg->len; i += PQmblenBounded(&msg->data[i], encoding)) { int w = pg_encoding_dsplen(encoding, &msg->data[i]); |