Convert a few more datatype input functions to report errors softly.
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 15 Dec 2022 00:42:05 +0000 (19:42 -0500)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 15 Dec 2022 00:42:05 +0000 (19:42 -0500)
Convert the remaining string-category input functions
(bpcharin, varcharin, byteain) to the new style.

Discussion: https://postgr.es/m/3038346.1671060258@sss.pgh.pa.us

14 files changed:
src/backend/utils/adt/encode.c
src/backend/utils/adt/varchar.c
src/backend/utils/adt/varlena.c
src/include/utils/builtins.h
src/test/regress/expected/char.out
src/test/regress/expected/char_1.out
src/test/regress/expected/char_2.out
src/test/regress/expected/strings.out
src/test/regress/expected/varchar.out
src/test/regress/expected/varchar_1.out
src/test/regress/expected/varchar_2.out
src/test/regress/sql/char.sql
src/test/regress/sql/strings.sql
src/test/regress/sql/varchar.sql

index feb3e830e4fd83011302403d7f453b963daacdfb..f3bb5cca43cef2a426e89d001532f7492ccfe84b 100644 (file)
@@ -171,8 +171,8 @@ hex_encode(const char *src, size_t len, char *dst)
        return (uint64) len * 2;
 }
 
-static inline char
-get_hex(const char *cp)
+static inline bool
+get_hex(const char *cp, char *out)
 {
        unsigned char c = (unsigned char) *cp;
        int                     res = -1;
@@ -180,17 +180,19 @@ get_hex(const char *cp)
        if (c < 127)
                res = hexlookup[c];
 
-       if (res < 0)
-               ereport(ERROR,
-                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                                errmsg("invalid hexadecimal digit: \"%.*s\"",
-                                               pg_mblen(cp), cp)));
+       *out = (char) res;
 
-       return (char) res;
+       return (res >= 0);
 }
 
 uint64
 hex_decode(const char *src, size_t len, char *dst)
+{
+       return hex_decode_safe(src, len, dst, NULL);
+}
+
+uint64
+hex_decode_safe(const char *src, size_t len, char *dst, Node *escontext)
 {
        const char *s,
                           *srcend;
@@ -208,16 +210,23 @@ hex_decode(const char *src, size_t len, char *dst)
                        s++;
                        continue;
                }
-               v1 = get_hex(s) << 4;
+               if (!get_hex(s, &v1))
+                       ereturn(escontext, 0,
+                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                        errmsg("invalid hexadecimal digit: \"%.*s\"",
+                                                       pg_mblen(s), s)));
                s++;
                if (s >= srcend)
-                       ereport(ERROR,
+                       ereturn(escontext, 0,
                                        (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                                         errmsg("invalid hexadecimal data: odd number of digits")));
-
-               v2 = get_hex(s);
+               if (!get_hex(s, &v2))
+                       ereturn(escontext, 0,
+                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                        errmsg("invalid hexadecimal digit: \"%.*s\"",
+                                                       pg_mblen(s), s)));
                s++;
-               *p++ = v1 | v2;
+               *p++ = (v1 << 4) | v2;
        }
 
        return p - dst;
index a63c498181e1895b71f65e79aaa19b484653f4c9..01a2db6b23b77469a15965e990259f3f9eb5bebb 100644 (file)
@@ -122,9 +122,13 @@ anychar_typmodout(int32 typmod)
  *
  * If the input string is too long, raise an error, unless the extra
  * characters are spaces, in which case they're truncated.  (per SQL)
+ *
+ * If escontext points to an ErrorSaveContext node, that is filled instead
+ * of throwing an error; the caller must check SOFT_ERROR_OCCURRED()
+ * to detect errors.
  */
 static BpChar *
-bpchar_input(const char *s, size_t len, int32 atttypmod)
+bpchar_input(const char *s, size_t len, int32 atttypmod, Node *escontext)
 {
        BpChar     *result;
        char       *r;
@@ -153,7 +157,7 @@ bpchar_input(const char *s, size_t len, int32 atttypmod)
                        for (j = mbmaxlen; j < len; j++)
                        {
                                if (s[j] != ' ')
-                                       ereport(ERROR,
+                                       ereturn(escontext, NULL,
                                                        (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
                                                         errmsg("value too long for type character(%d)",
                                                                        (int) maxlen)));
@@ -195,14 +199,13 @@ Datum
 bpcharin(PG_FUNCTION_ARGS)
 {
        char       *s = PG_GETARG_CSTRING(0);
-
 #ifdef NOT_USED
        Oid                     typelem = PG_GETARG_OID(1);
 #endif
        int32           atttypmod = PG_GETARG_INT32(2);
        BpChar     *result;
 
-       result = bpchar_input(s, strlen(s), atttypmod);
+       result = bpchar_input(s, strlen(s), atttypmod, fcinfo->context);
        PG_RETURN_BPCHAR_P(result);
 }
 
@@ -228,7 +231,6 @@ Datum
 bpcharrecv(PG_FUNCTION_ARGS)
 {
        StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
-
 #ifdef NOT_USED
        Oid                     typelem = PG_GETARG_OID(1);
 #endif
@@ -238,7 +240,7 @@ bpcharrecv(PG_FUNCTION_ARGS)
        int                     nbytes;
 
        str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
-       result = bpchar_input(str, nbytes, atttypmod);
+       result = bpchar_input(str, nbytes, atttypmod, NULL);
        pfree(str);
        PG_RETURN_BPCHAR_P(result);
 }
@@ -448,11 +450,12 @@ bpchartypmodout(PG_FUNCTION_ARGS)
  * If the input string is too long, raise an error, unless the extra
  * characters are spaces, in which case they're truncated.  (per SQL)
  *
- * Uses the C string to text conversion function, which is only appropriate
- * if VarChar and text are equivalent types.
+ * If escontext points to an ErrorSaveContext node, that is filled instead
+ * of throwing an error; the caller must check SOFT_ERROR_OCCURRED()
+ * to detect errors.
  */
 static VarChar *
-varchar_input(const char *s, size_t len, int32 atttypmod)
+varchar_input(const char *s, size_t len, int32 atttypmod, Node *escontext)
 {
        VarChar    *result;
        size_t          maxlen;
@@ -468,7 +471,7 @@ varchar_input(const char *s, size_t len, int32 atttypmod)
                for (j = mbmaxlen; j < len; j++)
                {
                        if (s[j] != ' ')
-                               ereport(ERROR,
+                               ereturn(escontext, NULL,
                                                (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
                                                 errmsg("value too long for type character varying(%d)",
                                                                (int) maxlen)));
@@ -477,6 +480,10 @@ varchar_input(const char *s, size_t len, int32 atttypmod)
                len = mbmaxlen;
        }
 
+       /*
+        * We can use cstring_to_text_with_len because VarChar and text are
+        * binary-compatible types.
+        */
        result = (VarChar *) cstring_to_text_with_len(s, len);
        return result;
 }
@@ -489,14 +496,13 @@ Datum
 varcharin(PG_FUNCTION_ARGS)
 {
        char       *s = PG_GETARG_CSTRING(0);
-
 #ifdef NOT_USED
        Oid                     typelem = PG_GETARG_OID(1);
 #endif
        int32           atttypmod = PG_GETARG_INT32(2);
        VarChar    *result;
 
-       result = varchar_input(s, strlen(s), atttypmod);
+       result = varchar_input(s, strlen(s), atttypmod, fcinfo->context);
        PG_RETURN_VARCHAR_P(result);
 }
 
@@ -522,7 +528,6 @@ Datum
 varcharrecv(PG_FUNCTION_ARGS)
 {
        StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
-
 #ifdef NOT_USED
        Oid                     typelem = PG_GETARG_OID(1);
 #endif
@@ -532,7 +537,7 @@ varcharrecv(PG_FUNCTION_ARGS)
        int                     nbytes;
 
        str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
-       result = varchar_input(str, nbytes, atttypmod);
+       result = varchar_input(str, nbytes, atttypmod, NULL);
        pfree(str);
        PG_RETURN_VARCHAR_P(result);
 }
index c5e7ee7ca2d3073c067928cfa35c1e746218bb64..1c52deec556aab2f8a9f3e3aafc73219c08a36d8 100644 (file)
@@ -295,6 +295,7 @@ Datum
 byteain(PG_FUNCTION_ARGS)
 {
        char       *inputText = PG_GETARG_CSTRING(0);
+       Node       *escontext = fcinfo->context;
        char       *tp;
        char       *rp;
        int                     bc;
@@ -307,7 +308,8 @@ byteain(PG_FUNCTION_ARGS)
 
                bc = (len - 2) / 2 + VARHDRSZ;  /* maximum possible length */
                result = palloc(bc);
-               bc = hex_decode(inputText + 2, len - 2, VARDATA(result));
+               bc = hex_decode_safe(inputText + 2, len - 2, VARDATA(result),
+                                                        escontext);
                SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
 
                PG_RETURN_BYTEA_P(result);
@@ -331,7 +333,7 @@ byteain(PG_FUNCTION_ARGS)
                        /*
                         * one backslash, not followed by another or ### valid octal
                         */
-                       ereport(ERROR,
+                       ereturn(escontext, (Datum) 0,
                                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                                         errmsg("invalid input syntax for type %s", "bytea")));
                }
@@ -372,7 +374,7 @@ byteain(PG_FUNCTION_ARGS)
                        /*
                         * We should never get here. The first pass should not allow it.
                         */
-                       ereport(ERROR,
+                       ereturn(escontext, (Datum) 0,
                                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                                         errmsg("invalid input syntax for type %s", "bytea")));
                }
index 10d13b0f1e97e24f4964c0a927f85caf0faa53c4..15373ba68f75f449a076ad8d674db25baeb20134 100644 (file)
@@ -34,6 +34,8 @@ extern int    errdomainconstraint(Oid datatypeOid, const char *conname);
 /* encode.c */
 extern uint64 hex_encode(const char *src, size_t len, char *dst);
 extern uint64 hex_decode(const char *src, size_t len, char *dst);
+extern uint64 hex_decode_safe(const char *src, size_t len, char *dst,
+                                                         Node *escontext);
 
 /* int.c */
 extern int2vector *buildint2vector(const int16 *int2s, int n);
index ea9b0b8eeb3ffe4c9464bafe853e6d1e4fb6c478..199001b2fede8fe08bf1da51cdf4613d5f12f610 100644 (file)
@@ -119,6 +119,25 @@ SELECT * FROM CHAR_TBL;
  abcd
 (4 rows)
 
+-- Also try it with non-error-throwing API
+SELECT pg_input_is_valid('abcd  ', 'char(4)');
+ pg_input_is_valid 
+-------------------
+ t
+(1 row)
+
+SELECT pg_input_is_valid('abcde', 'char(4)');
+ pg_input_is_valid 
+-------------------
+ f
+(1 row)
+
+SELECT pg_input_error_message('abcde', 'char(4)');
+        pg_input_error_message        
+--------------------------------------
+ value too long for type character(4)
+(1 row)
+
 --
 -- Also test "char", which is an ad-hoc one-byte type.  It can only
 -- really store ASCII characters, but we allow high-bit-set characters
index ffd31551de58df53ebfad3a6f70ed92e63f13c04..3dcb0daa0d66268d3ba8527b9228bb7bb473e833 100644 (file)
@@ -119,6 +119,25 @@ SELECT * FROM CHAR_TBL;
  abcd
 (4 rows)
 
+-- Also try it with non-error-throwing API
+SELECT pg_input_is_valid('abcd  ', 'char(4)');
+ pg_input_is_valid 
+-------------------
+ t
+(1 row)
+
+SELECT pg_input_is_valid('abcde', 'char(4)');
+ pg_input_is_valid 
+-------------------
+ f
+(1 row)
+
+SELECT pg_input_error_message('abcde', 'char(4)');
+        pg_input_error_message        
+--------------------------------------
+ value too long for type character(4)
+(1 row)
+
 --
 -- Also test "char", which is an ad-hoc one-byte type.  It can only
 -- really store ASCII characters, but we allow high-bit-set characters
index 56818f824b5f8d27a6dfa613b9ae9b71a3a64ec3..dd5d34fe8da50e54643b972e41eb805a8be9422f 100644 (file)
@@ -119,6 +119,25 @@ SELECT * FROM CHAR_TBL;
  abcd
 (4 rows)
 
+-- Also try it with non-error-throwing API
+SELECT pg_input_is_valid('abcd  ', 'char(4)');
+ pg_input_is_valid 
+-------------------
+ t
+(1 row)
+
+SELECT pg_input_is_valid('abcde', 'char(4)');
+ pg_input_is_valid 
+-------------------
+ f
+(1 row)
+
+SELECT pg_input_error_message('abcde', 'char(4)');
+        pg_input_error_message        
+--------------------------------------
+ value too long for type character(4)
+(1 row)
+
 --
 -- Also test "char", which is an ad-hoc one-byte type.  It can only
 -- really store ASCII characters, but we allow high-bit-set characters
index 69d7ed4ef1cfddb0a29888ed267232e7c881e1d1..f028c1f10f2009c0b57da6e4e87a5fe7dc432daa 100644 (file)
@@ -273,6 +273,31 @@ SELECT E'De\\123dBeEf'::bytea;
  DeSdBeEf
 (1 row)
 
+-- Test non-error-throwing API too
+SELECT pg_input_is_valid(E'\\xDeAdBeE', 'bytea');
+ pg_input_is_valid 
+-------------------
+ f
+(1 row)
+
+SELECT pg_input_error_message(E'\\xDeAdBeE', 'bytea');
+             pg_input_error_message             
+------------------------------------------------
+ invalid hexadecimal data: odd number of digits
+(1 row)
+
+SELECT pg_input_error_message(E'\\xDeAdBeEx', 'bytea');
+     pg_input_error_message     
+--------------------------------
+ invalid hexadecimal digit: "x"
+(1 row)
+
+SELECT pg_input_error_message(E'foo\\99bar', 'bytea');
+       pg_input_error_message        
+-------------------------------------
+ invalid input syntax for type bytea
+(1 row)
+
 --
 -- test conversions between various string types
 -- E021-10 implicit casting among the character data types
index f1a8202d9f9f2702e8c94fae95f5d779f2c62154..62b683d86ff50104b5c6395c7c03b9be08074036 100644 (file)
@@ -111,3 +111,22 @@ SELECT * FROM VARCHAR_TBL;
  abcd
 (4 rows)
 
+-- Also try it with non-error-throwing API
+SELECT pg_input_is_valid('abcd  ', 'varchar(4)');
+ pg_input_is_valid 
+-------------------
+ t
+(1 row)
+
+SELECT pg_input_is_valid('abcde', 'varchar(4)');
+ pg_input_is_valid 
+-------------------
+ f
+(1 row)
+
+SELECT pg_input_error_message('abcde', 'varchar(4)');
+            pg_input_error_message            
+----------------------------------------------
+ value too long for type character varying(4)
+(1 row)
+
index 6f01ef969ec100c8c0cba80f18406b68c5b18b51..6690f81c0b80ad05238bf0ea9315204cf21a9751 100644 (file)
@@ -111,3 +111,22 @@ SELECT * FROM VARCHAR_TBL;
  abcd
 (4 rows)
 
+-- Also try it with non-error-throwing API
+SELECT pg_input_is_valid('abcd  ', 'varchar(4)');
+ pg_input_is_valid 
+-------------------
+ t
+(1 row)
+
+SELECT pg_input_is_valid('abcde', 'varchar(4)');
+ pg_input_is_valid 
+-------------------
+ f
+(1 row)
+
+SELECT pg_input_error_message('abcde', 'varchar(4)');
+            pg_input_error_message            
+----------------------------------------------
+ value too long for type character varying(4)
+(1 row)
+
index 72e57050ea4786af8c4aab30bf383a0bf93606bf..ad8aa7c6933903488be6dbc75f7f7d9039ff8c7e 100644 (file)
@@ -111,3 +111,22 @@ SELECT * FROM VARCHAR_TBL;
  abcd
 (4 rows)
 
+-- Also try it with non-error-throwing API
+SELECT pg_input_is_valid('abcd  ', 'varchar(4)');
+ pg_input_is_valid 
+-------------------
+ t
+(1 row)
+
+SELECT pg_input_is_valid('abcde', 'varchar(4)');
+ pg_input_is_valid 
+-------------------
+ f
+(1 row)
+
+SELECT pg_input_error_message('abcde', 'varchar(4)');
+            pg_input_error_message            
+----------------------------------------------
+ value too long for type character varying(4)
+(1 row)
+
index 120fed53e5c39d572ff0fe66bba2a90d5ce32d92..8aa43b0fb8ec3f4208cb38add32c775b4715bc37 100644 (file)
@@ -72,6 +72,11 @@ INSERT INTO CHAR_TBL (f1) VALUES ('abcde');
 
 SELECT * FROM CHAR_TBL;
 
+-- Also try it with non-error-throwing API
+SELECT pg_input_is_valid('abcd  ', 'char(4)');
+SELECT pg_input_is_valid('abcde', 'char(4)');
+SELECT pg_input_error_message('abcde', 'char(4)');
+
 --
 -- Also test "char", which is an ad-hoc one-byte type.  It can only
 -- really store ASCII characters, but we allow high-bit-set characters
index 04109f599dda86d7324cd7d45deeef6372a62d6b..932f71cbca467788e10316895e00dd8ee88a453b 100644 (file)
@@ -85,6 +85,12 @@ SELECT E'DeAdBeEf'::bytea;
 SELECT E'De\\000dBeEf'::bytea;
 SELECT E'De\\123dBeEf'::bytea;
 
+-- Test non-error-throwing API too
+SELECT pg_input_is_valid(E'\\xDeAdBeE', 'bytea');
+SELECT pg_input_error_message(E'\\xDeAdBeE', 'bytea');
+SELECT pg_input_error_message(E'\\xDeAdBeEx', 'bytea');
+SELECT pg_input_error_message(E'foo\\99bar', 'bytea');
+
 --
 -- test conversions between various string types
 -- E021-10 implicit casting among the character data types
index a97082142621981ade9f38cf03d3aa905e334603..df16da37a7367909b1f2182416ee9d56bf83bb0d 100644 (file)
@@ -66,3 +66,8 @@ DROP TABLE VARCHAR_TBL;
 INSERT INTO VARCHAR_TBL (f1) VALUES ('abcde');
 
 SELECT * FROM VARCHAR_TBL;
+
+-- Also try it with non-error-throwing API
+SELECT pg_input_is_valid('abcd  ', 'varchar(4)');
+SELECT pg_input_is_valid('abcde', 'varchar(4)');
+SELECT pg_input_error_message('abcde', 'varchar(4)');