From e97c227d45e162c09130622ad51e1d1aced06134 Mon Sep 17 00:00:00 2001 From: Florents Tselai Date: Fri, 14 Mar 2025 20:51:33 +0200 Subject: [PATCH] base64url support for encode/decode functions. Refactored and with better test cases --- doc/src/sgml/func.sgml | 18 ++++ src/backend/utils/adt/encode.c | 126 ++++++++++++++++++++++++++ src/test/regress/expected/strings.out | 57 ++++++++++++ src/test/regress/sql/strings.sql | 18 ++++ 4 files changed, 219 insertions(+) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index af3d056b9923..e35b51c002e0 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -4989,6 +4989,7 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three'); Encodes binary data into a textual representation; supported format values are: base64, + base64url, escape, hex. @@ -5046,6 +5047,23 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three'); + + base64url + + base64url format + + + + The base64url format is a URL-safe variant of + RFC 4648 + Section 5. Unlike standard base64, it replaces + '+' with '-' and '/' with '_' + to ensure safe usage in URLs and filenames. Additionally, the padding character + '=' is omitted. + + + + escape diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c index 4ccaed815d17..9522eecd4bef 100644 --- a/src/backend/utils/adt/encode.c +++ b/src/backend/utils/adt/encode.c @@ -415,6 +415,126 @@ pg_base64_dec_len(const char *src, size_t srclen) return ((uint64) srclen * 3) >> 2; } +/* + * Calculate the length of base64url encoded output for given input length + * Base64url encoding: 3 bytes -> 4 chars, padding to multiple of 4 + */ +static uint64 +pg_base64url_enc_len(const char *src, size_t srclen) +{ + uint64 result; + + /* + * Base64 encoding converts 3 bytes into 4 characters + * Formula: ceil(srclen / 3) * 4 + * + * Unlike standard base64, base64url doesn't use padding characters + * when the input length is not divisible by 3 + */ + result = (srclen + 2) / 3 * 4; /* ceiling division by 3, then multiply by 4 */ + + return result; +} + + +static uint64 +pg_base64url_dec_len(const char *src, size_t srclen) +{ + /* For Base64, each 4 characters of input produce at most 3 bytes of output */ + /* For Base64URL without padding, we need to round up to the nearest 4 */ + size_t adjusted_len = srclen; + if (srclen % 4 != 0) + adjusted_len += 4 - (srclen % 4); + + return (adjusted_len * 3) / 4; +} + +static uint64 +pg_base64url_encode(const char *src, size_t len, char *dst) +{ + uint64 encoded_len; + if (len == 0) + return 0; + + encoded_len = pg_base64_encode(src, len, dst); + + /* Convert Base64 to Base64URL */ + for (uint64 i = 0; i < encoded_len; i++) { + if (dst[i] == '+') + dst[i] = '-'; + else if (dst[i] == '/') + dst[i] = '_'; + } + + /* Trim '=' padding */ + while (encoded_len > 0 && dst[encoded_len - 1] == '=') + encoded_len--; + + return encoded_len; +} + +static uint64 +pg_base64url_decode(const char *src, size_t len, char *dst) +{ + size_t i, pad_len, base64_len; + uint64 decoded_len; + char *base64; + + /* Handle empty input specially */ + if (len == 0) + return 0; + + /* Calculate padding needed for standard base64 */ + pad_len = 0; + if (len % 4 != 0) + pad_len = 4 - (len % 4); + + /* Allocate memory for converted string */ + base64_len = len + pad_len; + base64 = palloc(base64_len + 1); /* +1 for null terminator */ + + /* Convert Base64URL to Base64 */ + for (i = 0; i < len; i++) + { + char c = src[i]; + if (c == '-') + base64[i] = '+'; /* Convert '-' to '+' */ + else if (c == '_') + base64[i] = '/'; /* Convert '_' to '/' */ + else if ((c >= 'A' && c <= 'Z') || + (c >= 'a' && c <= 'z') || + (c >= '0' && c <= '9')) + base64[i] = c; /* Keep alphanumeric chars unchanged */ + else if (c == '=') + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid base64url input"), + errhint("Base64URL encoding should not contain padding '='."))); + else if (c == '+' || c == '/') + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid base64url character: '%c'", c), + errhint("Base64URL should use '-' instead of '+' and '_' instead of '/'."))); + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid base64url character: '%c'", c))); + } + + /* Add padding if necessary */ + for (i = 0; i < pad_len; i++) + base64[len + i] = '='; + + base64[base64_len] = '\0'; /* Null-terminate for safety */ + + /* Decode using the standard Base64 decoder */ + decoded_len = pg_base64_decode(base64, base64_len, dst); + + /* Free allocated memory */ + pfree(base64); + return decoded_len; +} + /* * Escape * Minimally escape bytea to text. @@ -606,6 +726,12 @@ static const struct pg_base64_enc_len, pg_base64_dec_len, pg_base64_encode, pg_base64_decode } }, + { + "base64url", + { + pg_base64url_enc_len, pg_base64url_dec_len, pg_base64url_encode, pg_base64url_decode + } + }, { "escape", { diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out index 174f0a68331b..9536f826b821 100644 --- a/src/test/regress/expected/strings.out +++ b/src/test/regress/expected/strings.out @@ -2395,6 +2395,63 @@ SELECT decode(encode('\x1234567890abcdef00', 'escape'), 'escape'); \x1234567890abcdef00 (1 row) +-- +-- encode/decode Base64URL +-- +SET bytea_output TO hex; +-- Flaghsip Test case against base64. +-- Notice the = padding removed at the end and special chars. +SELECT encode('\x69b73eff', 'base64'); -- Expected: abc+/w== + encode +---------- + abc+/w== +(1 row) + +SELECT encode('\x69b73eff', 'base64url'); -- Expected: abc-_w + encode +-------- + abc-_w +(1 row) + +SELECT decode(encode('\x69b73eff', 'base64url'), 'base64url'); + decode +------------ + \x69b73eff +(1 row) + +-- Test basic encoding/decoding +SELECT encode('\x1234567890abcdef00', 'base64url'); -- Expected: EjRWeJCrze8A + encode +-------------- + EjRWeJCrze8A +(1 row) + +SELECT decode(encode('\x1234567890abcdef00', 'base64url'), 'base64url'); -- Expected: \x1234567890abcdef00 + decode +---------------------- + \x1234567890abcdef00 +(1 row) + +-- Test with empty input +SELECT encode('', 'base64url'); + encode +-------- + +(1 row) + +SELECT decode('', 'base64url'); + decode +-------- + \x +(1 row) + +-- Test round-trip conversion +SELECT encode(decode('SGVsbG8gV29ybGQh', 'base64url'), 'base64url'); -- Expected: SGVsbG8gV29ybGQh (decodes to "Hello World!") + encode +------------------ + SGVsbG8gV29ybGQh +(1 row) + -- -- get_bit/set_bit etc -- diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql index f7b325baadf4..640eb67098e0 100644 --- a/src/test/regress/sql/strings.sql +++ b/src/test/regress/sql/strings.sql @@ -754,6 +754,24 @@ SELECT decode(encode(('\x' || repeat('1234567890abcdef0001', 7))::bytea, SELECT encode('\x1234567890abcdef00', 'escape'); SELECT decode(encode('\x1234567890abcdef00', 'escape'), 'escape'); +-- +-- encode/decode Base64URL +-- +SET bytea_output TO hex; +-- Flaghsip Test case against base64. +-- Notice the = padding removed at the end and special chars. +SELECT encode('\x69b73eff', 'base64'); -- Expected: abc+/w== +SELECT encode('\x69b73eff', 'base64url'); -- Expected: abc-_w +SELECT decode(encode('\x69b73eff', 'base64url'), 'base64url'); +-- Test basic encoding/decoding +SELECT encode('\x1234567890abcdef00', 'base64url'); -- Expected: EjRWeJCrze8A +SELECT decode(encode('\x1234567890abcdef00', 'base64url'), 'base64url'); -- Expected: \x1234567890abcdef00 +-- Test with empty input +SELECT encode('', 'base64url'); +SELECT decode('', 'base64url'); +-- Test round-trip conversion +SELECT encode(decode('SGVsbG8gV29ybGQh', 'base64url'), 'base64url'); -- Expected: SGVsbG8gV29ybGQh (decodes to "Hello World!") + -- -- get_bit/set_bit etc --