Skip to content

Commit 17a6910

Browse files
Florents-TselaiCommitfest Bot
authored and
Commitfest Bot
committed
base64url support for encode/decode functions. Refactored and with better test cases
1 parent 45e7e8c commit 17a6910

File tree

4 files changed

+219
-0
lines changed

4 files changed

+219
-0
lines changed

doc/src/sgml/func.sgml

+18
Original file line numberDiff line numberDiff line change
@@ -4989,6 +4989,7 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
49894989
Encodes binary data into a textual representation; supported
49904990
<parameter>format</parameter> values are:
49914991
<link linkend="encode-format-base64"><literal>base64</literal></link>,
4992+
<link linkend="encode-format-base64url"><literal>base64url</literal></link>,
49924993
<link linkend="encode-format-escape"><literal>escape</literal></link>,
49934994
<link linkend="encode-format-hex"><literal>hex</literal></link>.
49944995
</para>
@@ -5046,6 +5047,23 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
50465047
</listitem>
50475048
</varlistentry>
50485049

5050+
<varlistentry id="encode-format-base64url">
5051+
<term>base64url
5052+
<indexterm>
5053+
<primary>base64url format</primary>
5054+
</indexterm></term>
5055+
<listitem>
5056+
<para>
5057+
The <literal>base64url</literal> format is a URL-safe variant of
5058+
<ulink url="https://datatracker.ietf.org/doc/html/rfc4648#section-5">RFC 4648
5059+
Section 5</ulink>. Unlike standard <literal>base64</literal>, it replaces
5060+
<literal>'+'</literal> with <literal>'-'</literal> and <literal>'/'</literal> with <literal>'_'</literal>
5061+
to ensure safe usage in URLs and filenames. Additionally, the padding character
5062+
<literal>'='</literal> is omitted.
5063+
</para>
5064+
</listitem>
5065+
</varlistentry>
5066+
50495067
<varlistentry id="encode-format-escape">
50505068
<term>escape
50515069
<indexterm>

src/backend/utils/adt/encode.c

+126
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,126 @@ pg_base64_dec_len(const char *src, size_t srclen)
415415
return ((uint64) srclen * 3) >> 2;
416416
}
417417

418+
/*
419+
* Calculate the length of base64url encoded output for given input length
420+
* Base64url encoding: 3 bytes -> 4 chars, padding to multiple of 4
421+
*/
422+
static uint64
423+
pg_base64url_enc_len(const char *src, size_t srclen)
424+
{
425+
uint64 result;
426+
427+
/*
428+
* Base64 encoding converts 3 bytes into 4 characters
429+
* Formula: ceil(srclen / 3) * 4
430+
*
431+
* Unlike standard base64, base64url doesn't use padding characters
432+
* when the input length is not divisible by 3
433+
*/
434+
result = (srclen + 2) / 3 * 4; /* ceiling division by 3, then multiply by 4 */
435+
436+
return result;
437+
}
438+
439+
440+
static uint64
441+
pg_base64url_dec_len(const char *src, size_t srclen)
442+
{
443+
/* For Base64, each 4 characters of input produce at most 3 bytes of output */
444+
/* For Base64URL without padding, we need to round up to the nearest 4 */
445+
size_t adjusted_len = srclen;
446+
if (srclen % 4 != 0)
447+
adjusted_len += 4 - (srclen % 4);
448+
449+
return (adjusted_len * 3) / 4;
450+
}
451+
452+
static uint64
453+
pg_base64url_encode(const char *src, size_t len, char *dst)
454+
{
455+
uint64 encoded_len;
456+
if (len == 0)
457+
return 0;
458+
459+
encoded_len = pg_base64_encode(src, len, dst);
460+
461+
/* Convert Base64 to Base64URL */
462+
for (uint64 i = 0; i < encoded_len; i++) {
463+
if (dst[i] == '+')
464+
dst[i] = '-';
465+
else if (dst[i] == '/')
466+
dst[i] = '_';
467+
}
468+
469+
/* Trim '=' padding */
470+
while (encoded_len > 0 && dst[encoded_len - 1] == '=')
471+
encoded_len--;
472+
473+
return encoded_len;
474+
}
475+
476+
static uint64
477+
pg_base64url_decode(const char *src, size_t len, char *dst)
478+
{
479+
size_t i, pad_len, base64_len;
480+
uint64 decoded_len;
481+
char *base64;
482+
483+
/* Handle empty input specially */
484+
if (len == 0)
485+
return 0;
486+
487+
/* Calculate padding needed for standard base64 */
488+
pad_len = 0;
489+
if (len % 4 != 0)
490+
pad_len = 4 - (len % 4);
491+
492+
/* Allocate memory for converted string */
493+
base64_len = len + pad_len;
494+
base64 = palloc(base64_len + 1); /* +1 for null terminator */
495+
496+
/* Convert Base64URL to Base64 */
497+
for (i = 0; i < len; i++)
498+
{
499+
char c = src[i];
500+
if (c == '-')
501+
base64[i] = '+'; /* Convert '-' to '+' */
502+
else if (c == '_')
503+
base64[i] = '/'; /* Convert '_' to '/' */
504+
else if ((c >= 'A' && c <= 'Z') ||
505+
(c >= 'a' && c <= 'z') ||
506+
(c >= '0' && c <= '9'))
507+
base64[i] = c; /* Keep alphanumeric chars unchanged */
508+
else if (c == '=')
509+
ereport(ERROR,
510+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
511+
errmsg("invalid base64url input"),
512+
errhint("Base64URL encoding should not contain padding '='.")));
513+
else if (c == '+' || c == '/')
514+
ereport(ERROR,
515+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
516+
errmsg("invalid base64url character: '%c'", c),
517+
errhint("Base64URL should use '-' instead of '+' and '_' instead of '/'.")));
518+
else
519+
ereport(ERROR,
520+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
521+
errmsg("invalid base64url character: '%c'", c)));
522+
}
523+
524+
/* Add padding if necessary */
525+
for (i = 0; i < pad_len; i++)
526+
base64[len + i] = '=';
527+
528+
base64[base64_len] = '\0'; /* Null-terminate for safety */
529+
530+
/* Decode using the standard Base64 decoder */
531+
decoded_len = pg_base64_decode(base64, base64_len, dst);
532+
533+
/* Free allocated memory */
534+
pfree(base64);
535+
return decoded_len;
536+
}
537+
418538
/*
419539
* Escape
420540
* Minimally escape bytea to text.
@@ -606,6 +726,12 @@ static const struct
606726
pg_base64_enc_len, pg_base64_dec_len, pg_base64_encode, pg_base64_decode
607727
}
608728
},
729+
{
730+
"base64url",
731+
{
732+
pg_base64url_enc_len, pg_base64url_dec_len, pg_base64url_encode, pg_base64url_decode
733+
}
734+
},
609735
{
610736
"escape",
611737
{

src/test/regress/expected/strings.out

+57
Original file line numberDiff line numberDiff line change
@@ -2395,6 +2395,63 @@ SELECT decode(encode('\x1234567890abcdef00', 'escape'), 'escape');
23952395
\x1234567890abcdef00
23962396
(1 row)
23972397

2398+
--
2399+
-- encode/decode Base64URL
2400+
--
2401+
SET bytea_output TO hex;
2402+
-- Flaghsip Test case against base64.
2403+
-- Notice the = padding removed at the end and special chars.
2404+
SELECT encode('\x69b73eff', 'base64'); -- Expected: abc+/w==
2405+
encode
2406+
----------
2407+
abc+/w==
2408+
(1 row)
2409+
2410+
SELECT encode('\x69b73eff', 'base64url'); -- Expected: abc-_w
2411+
encode
2412+
--------
2413+
abc-_w
2414+
(1 row)
2415+
2416+
SELECT decode(encode('\x69b73eff', 'base64url'), 'base64url');
2417+
decode
2418+
------------
2419+
\x69b73eff
2420+
(1 row)
2421+
2422+
-- Test basic encoding/decoding
2423+
SELECT encode('\x1234567890abcdef00', 'base64url'); -- Expected: EjRWeJCrze8A
2424+
encode
2425+
--------------
2426+
EjRWeJCrze8A
2427+
(1 row)
2428+
2429+
SELECT decode(encode('\x1234567890abcdef00', 'base64url'), 'base64url'); -- Expected: \x1234567890abcdef00
2430+
decode
2431+
----------------------
2432+
\x1234567890abcdef00
2433+
(1 row)
2434+
2435+
-- Test with empty input
2436+
SELECT encode('', 'base64url');
2437+
encode
2438+
--------
2439+
2440+
(1 row)
2441+
2442+
SELECT decode('', 'base64url');
2443+
decode
2444+
--------
2445+
\x
2446+
(1 row)
2447+
2448+
-- Test round-trip conversion
2449+
SELECT encode(decode('SGVsbG8gV29ybGQh', 'base64url'), 'base64url'); -- Expected: SGVsbG8gV29ybGQh (decodes to "Hello World!")
2450+
encode
2451+
------------------
2452+
SGVsbG8gV29ybGQh
2453+
(1 row)
2454+
23982455
--
23992456
-- get_bit/set_bit etc
24002457
--

src/test/regress/sql/strings.sql

+18
Original file line numberDiff line numberDiff line change
@@ -754,6 +754,24 @@ SELECT decode(encode(('\x' || repeat('1234567890abcdef0001', 7))::bytea,
754754
SELECT encode('\x1234567890abcdef00', 'escape');
755755
SELECT decode(encode('\x1234567890abcdef00', 'escape'), 'escape');
756756

757+
--
758+
-- encode/decode Base64URL
759+
--
760+
SET bytea_output TO hex;
761+
-- Flaghsip Test case against base64.
762+
-- Notice the = padding removed at the end and special chars.
763+
SELECT encode('\x69b73eff', 'base64'); -- Expected: abc+/w==
764+
SELECT encode('\x69b73eff', 'base64url'); -- Expected: abc-_w
765+
SELECT decode(encode('\x69b73eff', 'base64url'), 'base64url');
766+
-- Test basic encoding/decoding
767+
SELECT encode('\x1234567890abcdef00', 'base64url'); -- Expected: EjRWeJCrze8A
768+
SELECT decode(encode('\x1234567890abcdef00', 'base64url'), 'base64url'); -- Expected: \x1234567890abcdef00
769+
-- Test with empty input
770+
SELECT encode('', 'base64url');
771+
SELECT decode('', 'base64url');
772+
-- Test round-trip conversion
773+
SELECT encode(decode('SGVsbG8gV29ybGQh', 'base64url'), 'base64url'); -- Expected: SGVsbG8gV29ybGQh (decodes to "Hello World!")
774+
757775
--
758776
-- get_bit/set_bit etc
759777
--

0 commit comments

Comments
 (0)