summaryrefslogtreecommitdiff
path: root/src/test
diff options
context:
space:
mode:
Diffstat (limited to 'src/test')
-rw-r--r--src/test/modules/test_escape/test_escape.c96
-rw-r--r--src/test/regress/expected/conversion.out13
-rw-r--r--src/test/regress/sql/conversion.sql7
3 files changed, 110 insertions, 6 deletions
diff --git a/src/test/modules/test_escape/test_escape.c b/src/test/modules/test_escape/test_escape.c
index ffd9d7166fa..59430ed46c4 100644
--- a/src/test/modules/test_escape/test_escape.c
+++ b/src/test/modules/test_escape/test_escape.c
@@ -12,6 +12,7 @@
#include <string.h>
#include <stdio.h>
+#include "common/jsonapi.h"
#include "fe_utils/psqlscan.h"
#include "fe_utils/string_utils.h"
#include "getopt_long.h"
@@ -164,6 +165,88 @@ encoding_conflicts_ascii(int encoding)
}
+/*
+ * Confirm escaping doesn't read past the end of an allocation. Consider the
+ * result of malloc(4096), in the absence of freelist entries satisfying the
+ * allocation. On OpenBSD, reading one byte past the end of that object
+ * yields SIGSEGV.
+ *
+ * Run this test before the program's other tests, so freelists are minimal.
+ * len=4096 didn't SIGSEGV, likely due to free() calls in libpq. len=8192
+ * did. Use 128 KiB, to somewhat insulate the outcome from distant new free()
+ * calls and libc changes.
+ */
+static void
+test_gb18030_page_multiple(pe_test_config *tc)
+{
+ PQExpBuffer testname;
+ size_t input_len = 0x20000;
+ char *input;
+
+ /* prepare input */
+ input = pg_malloc(input_len);
+ memset(input, '-', input_len - 1);
+ input[input_len - 1] = 0xfe;
+
+ /* name to describe the test */
+ testname = createPQExpBuffer();
+ appendPQExpBuffer(testname, ">repeat(%c, %zu)", input[0], input_len - 1);
+ escapify(testname, input + input_len - 1, 1);
+ appendPQExpBuffer(testname, "< - GB18030 - PQescapeLiteral");
+
+ /* test itself */
+ PQsetClientEncoding(tc->conn, "GB18030");
+ report_result(tc, PQescapeLiteral(tc->conn, input, input_len) == NULL,
+ testname->data, "",
+ "input validity vs escape success", "ok");
+
+ destroyPQExpBuffer(testname);
+ pg_free(input);
+}
+
+/*
+ * Confirm json parsing doesn't read past the end of an allocation. This
+ * exercises wchar.c infrastructure like the true "escape" tests do, but this
+ * isn't an "escape" test.
+ */
+static void
+test_gb18030_json(pe_test_config *tc)
+{
+ PQExpBuffer raw_buf;
+ PQExpBuffer testname;
+ const char input[] = "{\"\\u\xFE";
+ size_t input_len = sizeof(input) - 1;
+ JsonLexContext *lex;
+ JsonSemAction sem = {0}; /* no callbacks */
+ JsonParseErrorType json_error;
+
+ /* prepare input like test_one_vector_escape() does */
+ raw_buf = createPQExpBuffer();
+ appendBinaryPQExpBuffer(raw_buf, input, input_len);
+ appendPQExpBufferStr(raw_buf, NEVER_ACCESS_STR);
+ VALGRIND_MAKE_MEM_NOACCESS(&raw_buf->data[input_len],
+ raw_buf->len - input_len);
+
+ /* name to describe the test */
+ testname = createPQExpBuffer();
+ appendPQExpBuffer(testname, ">");
+ escapify(testname, input, input_len);
+ appendPQExpBuffer(testname, "< - GB18030 - pg_parse_json");
+
+ /* test itself */
+ lex = makeJsonLexContextCstringLen(NULL, raw_buf->data, input_len,
+ PG_GB18030, false);
+ json_error = pg_parse_json(lex, &sem);
+ report_result(tc, json_error == JSON_UNICODE_ESCAPE_FORMAT,
+ testname->data, "",
+ "diagnosed", json_errdetail(json_error, lex));
+
+ freeJsonLexContext(lex);
+ destroyPQExpBuffer(testname);
+ destroyPQExpBuffer(raw_buf);
+}
+
+
static bool
escape_literal(PGconn *conn, PQExpBuffer target,
const char *unescaped, size_t unescaped_len,
@@ -451,8 +534,18 @@ static pe_test_vector pe_test_vectors[] =
* Testcases that are not null terminated for the specified input length.
* That's interesting to verify that escape functions don't read beyond
* the intended input length.
+ *
+ * One interesting special case is GB18030, which has the odd behaviour
+ * needing to read beyond the first byte to determine the length of a
+ * multi-byte character.
*/
TV_LEN("gbk", "\x80", 1),
+ TV_LEN("GB18030", "\x80", 1),
+ TV_LEN("GB18030", "\x80\0", 2),
+ TV_LEN("GB18030", "\x80\x30", 2),
+ TV_LEN("GB18030", "\x80\x30\0", 3),
+ TV_LEN("GB18030", "\x80\x30\x30", 3),
+ TV_LEN("GB18030", "\x80\x30\x30\0", 4),
TV_LEN("UTF-8", "\xC3\xb6 ", 1),
TV_LEN("UTF-8", "\xC3\xb6 ", 2),
};
@@ -861,6 +954,9 @@ main(int argc, char *argv[])
exit(1);
}
+ test_gb18030_page_multiple(&tc);
+ test_gb18030_json(&tc);
+
for (int i = 0; i < lengthof(pe_test_vectors); i++)
{
test_one_vector(&tc, &pe_test_vectors[i]);
diff --git a/src/test/regress/expected/conversion.out b/src/test/regress/expected/conversion.out
index d785f92561e..7dd1ef6161f 100644
--- a/src/test/regress/expected/conversion.out
+++ b/src/test/regress/expected/conversion.out
@@ -508,10 +508,13 @@ insert into gb18030_inputs values
('\x666f6f84309c38', 'valid, translates to UTF-8 by mapping function'),
('\x666f6f84309c', 'incomplete char '),
('\x666f6f84309c0a', 'incomplete char, followed by newline '),
+ ('\x666f6f84', 'incomplete char at end'),
('\x666f6f84309c3800', 'invalid, NUL byte'),
('\x666f6f84309c0038', 'invalid, NUL byte');
--- Test GB18030 verification
-select description, inbytes, (test_conv(inbytes, 'gb18030', 'gb18030')).* from gb18030_inputs;
+-- Test GB18030 verification. Round-trip through text so the backing of the
+-- bytea values is palloc, not shared_buffers. This lets Valgrind detect
+-- reads past the end.
+select description, inbytes, (test_conv(inbytes::text::bytea, 'gb18030', 'gb18030')).* from gb18030_inputs;
description | inbytes | result | errorat | error
------------------------------------------------+--------------------+------------------+--------------+-------------------------------------------------------------------
valid, pure ASCII | \x666f6f | \x666f6f | |
@@ -520,9 +523,10 @@ select description, inbytes, (test_conv(inbytes, 'gb18030', 'gb18030')).* from g
valid, translates to UTF-8 by mapping function | \x666f6f84309c38 | \x666f6f84309c38 | |
incomplete char | \x666f6f84309c | \x666f6f | \x84309c | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c
incomplete char, followed by newline | \x666f6f84309c0a | \x666f6f | \x84309c0a | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c 0x0a
+ incomplete char at end | \x666f6f84 | \x666f6f | \x84 | invalid byte sequence for encoding "GB18030": 0x84
invalid, NUL byte | \x666f6f84309c3800 | \x666f6f84309c38 | \x00 | invalid byte sequence for encoding "GB18030": 0x00
invalid, NUL byte | \x666f6f84309c0038 | \x666f6f | \x84309c0038 | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c 0x00
-(8 rows)
+(9 rows)
-- Test conversions from GB18030
select description, inbytes, (test_conv(inbytes, 'gb18030', 'utf8')).* from gb18030_inputs;
@@ -534,9 +538,10 @@ select description, inbytes, (test_conv(inbytes, 'gb18030', 'utf8')).* from gb18
valid, translates to UTF-8 by mapping function | \x666f6f84309c38 | \x666f6fefa8aa | |
incomplete char | \x666f6f84309c | \x666f6f | \x84309c | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c
incomplete char, followed by newline | \x666f6f84309c0a | \x666f6f | \x84309c0a | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c 0x0a
+ incomplete char at end | \x666f6f84 | \x666f6f | \x84 | invalid byte sequence for encoding "GB18030": 0x84
invalid, NUL byte | \x666f6f84309c3800 | \x666f6fefa8aa | \x00 | invalid byte sequence for encoding "GB18030": 0x00
invalid, NUL byte | \x666f6f84309c0038 | \x666f6f | \x84309c0038 | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c 0x00
-(8 rows)
+(9 rows)
--
-- ISO-8859-5
diff --git a/src/test/regress/sql/conversion.sql b/src/test/regress/sql/conversion.sql
index b567a1a5721..a80d62367a2 100644
--- a/src/test/regress/sql/conversion.sql
+++ b/src/test/regress/sql/conversion.sql
@@ -300,11 +300,14 @@ insert into gb18030_inputs values
('\x666f6f84309c38', 'valid, translates to UTF-8 by mapping function'),
('\x666f6f84309c', 'incomplete char '),
('\x666f6f84309c0a', 'incomplete char, followed by newline '),
+ ('\x666f6f84', 'incomplete char at end'),
('\x666f6f84309c3800', 'invalid, NUL byte'),
('\x666f6f84309c0038', 'invalid, NUL byte');
--- Test GB18030 verification
-select description, inbytes, (test_conv(inbytes, 'gb18030', 'gb18030')).* from gb18030_inputs;
+-- Test GB18030 verification. Round-trip through text so the backing of the
+-- bytea values is palloc, not shared_buffers. This lets Valgrind detect
+-- reads past the end.
+select description, inbytes, (test_conv(inbytes::text::bytea, 'gb18030', 'gb18030')).* from gb18030_inputs;
-- Test conversions from GB18030
select description, inbytes, (test_conv(inbytes, 'gb18030', 'utf8')).* from gb18030_inputs;