summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTom Lane2025-07-18 20:42:02 +0000
committerTom Lane2025-07-18 20:42:10 +0000
commit3683af617044d271ab7486d43d06f9689ed4961d (patch)
tree64dbe2ef951553f0ff18e84a52f4c56bcdac5ded /src
parent84409ed640568d8ccaaf1df1a41fb02f37d026ed (diff)
Speed up byteain by not parsing traditional-style input twice.
Instead of laboriously computing the exact output length, use strlen to get an upper bound cheaply. (This is still O(N) of course, but the constant factor is a lot less.) This will typically result in overallocating the output datum, but that's of little concern since it's a short-lived allocation in just about all use-cases. A simple microbenchmark showed about 40% speedup for long input strings. While here, make some cosmetic cleanups and add a test case that covers the double-backslash code path in byteain and byteaout. Author: Steven Niu <niushiji@gmail.com> Reviewed-by: Kirill Reshke <reshkekirill@gmail.com> Reviewed-by: Stepan Neretin <slpmcf@gmail.com> Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Discussion: https://postgr.es/m/ca315729-140b-426e-81a6-6cd5cfe7ecc5@gmail.com
Diffstat (limited to 'src')
-rw-r--r--src/backend/utils/adt/bytea.c61
-rw-r--r--src/test/regress/expected/strings.out12
-rw-r--r--src/test/regress/sql/strings.sql2
3 files changed, 30 insertions, 45 deletions
diff --git a/src/backend/utils/adt/bytea.c b/src/backend/utils/adt/bytea.c
index 2e539c2504e..6e7b914c563 100644
--- a/src/backend/utils/adt/bytea.c
+++ b/src/backend/utils/adt/bytea.c
@@ -182,27 +182,21 @@ bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
*
* Non-printable characters must be passed as '\nnn' (octal) and are
* converted to internal form. '\' must be passed as '\\'.
- * ereport(ERROR, ...) if bad form.
- *
- * BUGS:
- * The input is scanned twice.
- * The error checking of input is minimal.
*/
Datum
byteain(PG_FUNCTION_ARGS)
{
char *inputText = PG_GETARG_CSTRING(0);
Node *escontext = fcinfo->context;
+ size_t len = strlen(inputText);
+ size_t bc;
char *tp;
char *rp;
- int bc;
bytea *result;
/* Recognize hex input */
if (inputText[0] == '\\' && inputText[1] == 'x')
{
- size_t len = strlen(inputText);
-
bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */
result = palloc(bc);
bc = hex_decode_safe(inputText + 2, len - 2, VARDATA(result),
@@ -213,33 +207,7 @@ byteain(PG_FUNCTION_ARGS)
}
/* Else, it's the traditional escaped style */
- for (bc = 0, tp = inputText; *tp != '\0'; bc++)
- {
- if (tp[0] != '\\')
- tp++;
- else if ((tp[0] == '\\') &&
- (tp[1] >= '0' && tp[1] <= '3') &&
- (tp[2] >= '0' && tp[2] <= '7') &&
- (tp[3] >= '0' && tp[3] <= '7'))
- tp += 4;
- else if ((tp[0] == '\\') &&
- (tp[1] == '\\'))
- tp += 2;
- else
- {
- /*
- * one backslash, not followed by another or ### valid octal
- */
- ereturn(escontext, (Datum) 0,
- (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
- errmsg("invalid input syntax for type %s", "bytea")));
- }
- }
-
- bc += VARHDRSZ;
-
- result = (bytea *) palloc(bc);
- SET_VARSIZE(result, bc);
+ result = (bytea *) palloc(len + VARHDRSZ); /* maximum possible length */
tp = inputText;
rp = VARDATA(result);
@@ -247,21 +215,21 @@ byteain(PG_FUNCTION_ARGS)
{
if (tp[0] != '\\')
*rp++ = *tp++;
- else if ((tp[0] == '\\') &&
- (tp[1] >= '0' && tp[1] <= '3') &&
+ else if ((tp[1] >= '0' && tp[1] <= '3') &&
(tp[2] >= '0' && tp[2] <= '7') &&
(tp[3] >= '0' && tp[3] <= '7'))
{
- bc = VAL(tp[1]);
- bc <<= 3;
- bc += VAL(tp[2]);
- bc <<= 3;
- *rp++ = bc + VAL(tp[3]);
+ int v;
+
+ v = VAL(tp[1]);
+ v <<= 3;
+ v += VAL(tp[2]);
+ v <<= 3;
+ *rp++ = v + VAL(tp[3]);
tp += 4;
}
- else if ((tp[0] == '\\') &&
- (tp[1] == '\\'))
+ else if (tp[1] == '\\')
{
*rp++ = '\\';
tp += 2;
@@ -269,7 +237,7 @@ byteain(PG_FUNCTION_ARGS)
else
{
/*
- * We should never get here. The first pass should not allow it.
+ * one backslash, not followed by another or ### valid octal
*/
ereturn(escontext, (Datum) 0,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
@@ -277,6 +245,9 @@ byteain(PG_FUNCTION_ARGS)
}
}
+ bc = rp - VARDATA(result); /* actual length */
+ SET_VARSIZE(result, bc + VARHDRSZ);
+
PG_RETURN_BYTEA_P(result);
}
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index 788844abd20..1bfd33de3f3 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -236,6 +236,12 @@ SELECT E'De\\678dBeEf'::bytea;
ERROR: invalid input syntax for type bytea
LINE 1: SELECT E'De\\678dBeEf'::bytea;
^
+SELECT E'DeAd\\\\BeEf'::bytea;
+ bytea
+----------------------
+ \x446541645c42654566
+(1 row)
+
SELECT reverse(''::bytea);
reverse
---------
@@ -291,6 +297,12 @@ SELECT E'De\\123dBeEf'::bytea;
DeSdBeEf
(1 row)
+SELECT E'DeAd\\\\BeEf'::bytea;
+ bytea
+------------
+ DeAd\\BeEf
+(1 row)
+
-- Test non-error-throwing API too
SELECT pg_input_is_valid(E'\\xDeAdBeE', 'bytea');
pg_input_is_valid
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index 2577a42987d..92c445c2439 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -76,6 +76,7 @@ SELECT E'De\\000dBeEf'::bytea;
SELECT E'De\123dBeEf'::bytea;
SELECT E'De\\123dBeEf'::bytea;
SELECT E'De\\678dBeEf'::bytea;
+SELECT E'DeAd\\\\BeEf'::bytea;
SELECT reverse(''::bytea);
SELECT reverse('\xaa'::bytea);
@@ -88,6 +89,7 @@ SELECT E'\\xDe00BeEf'::bytea;
SELECT E'DeAdBeEf'::bytea;
SELECT E'De\\000dBeEf'::bytea;
SELECT E'De\\123dBeEf'::bytea;
+SELECT E'DeAd\\\\BeEf'::bytea;
-- Test non-error-throwing API too
SELECT pg_input_is_valid(E'\\xDeAdBeE', 'bytea');