Non-decimal integer literals

Add support for hexadecimal, octal, and binary integer literals: 0x42F 0o273 0b100101 per SQL:202x draft. This adds support in the lexer as well as in the integer type input functions. Reviewed-by: John Naylor <john.naylor@enterprisedb.com> Reviewed-by: Zhihong Yu <zyu@yugabyte.com> Reviewed-by: David Rowley <dgrowleyml@gmail.com> Reviewed-by: Dean Rasheed <dean.a.rasheed@gmail.com> Discussion: https://www.postgresql.org/message-id/flat/b239564c-cad0-b23e-c57e-166d883cb97d@enterprisedb.com
author: Peter Eisentraut 2022-12-14 04:40:38 +0000
committer: Peter Eisentraut 2022-12-14 05:17:07 +0000
commit: 6fcda9aba83449082124825b6d375c0a61e21c42 (patch)
tree: d2e23f5322bf6879e0ee328593fbc7b3f6f71702 /src/backend
parent: 60684dd834a222fefedd49b19d1f0a6189c1632e (diff)
5 files changed, 277 insertions, 53 deletions
diff --git a/src/backend/catalog/information_schema.sql b/src/backend/catalog/information_schema.sql
index 18725a02d1f..95c27a625e7 100644
--- a/src/backend/catalog/information_schema.sql
+++ b/src/backend/catalog/information_schema.sql
@@ -119,7 +119,7 @@ RETURN
          WHEN 1700 /*numeric*/ THEN
               CASE WHEN $2 = -1
                    THEN null
-                   ELSE (($2 - 4) >> 16) & 65535
+                   ELSE (($2 - 4) >> 16) & 0xFFFF
                    END
          WHEN 700 /*float4*/ THEN 24 /*FLT_MANT_DIG*/
          WHEN 701 /*float8*/ THEN 53 /*DBL_MANT_DIG*/
@@ -147,7 +147,7 @@ RETURN
        WHEN $1 IN (1700) THEN
             CASE WHEN $2 = -1
                  THEN null
-                 ELSE ($2 - 4) & 65535
+                 ELSE ($2 - 4) & 0xFFFF
                  END
        ELSE null
   END;
@@ -163,7 +163,7 @@ RETURN
        WHEN $1 IN (1083, 1114, 1184, 1266) /* time, timestamp, same + tz */
            THEN CASE WHEN $2 < 0 THEN 6 ELSE $2 END
        WHEN $1 IN (1186) /* interval */
-           THEN CASE WHEN $2 < 0 OR $2 & 65535 = 65535 THEN 6 ELSE $2 & 65535 END
+           THEN CASE WHEN $2 < 0 OR $2 & 0xFFFF = 0xFFFF THEN 6 ELSE $2 & 0xFFFF END
        ELSE null
   END;
 
diff --git a/src/backend/catalog/sql_features.txt b/src/backend/catalog/sql_features.txt
index 8704a42b60a..abad216b7ee 100644
--- a/src/backend/catalog/sql_features.txt
+++ b/src/backend/catalog/sql_features.txt
@@ -527,6 +527,7 @@ T652	SQL-dynamic statements in SQL routines			NO
 T653	SQL-schema statements in external routines			YES	
 T654	SQL-dynamic statements in external routines			NO	
 T655	Cyclically dependent routines			YES	
+T661	Non-decimal integer literals			YES	SQL:202x draft
 T811	Basic SQL/JSON constructor functions			NO	
 T812	SQL/JSON: JSON_OBJECTAGG			NO	
 T813	SQL/JSON: JSON_ARRAYAGG with ORDER BY			NO	
diff --git a/src/backend/parser/parse_node.c b/src/backend/parser/parse_node.c
index 4014db4b80f..d33e3c179df 100644
--- a/src/backend/parser/parse_node.c
+++ b/src/backend/parser/parse_node.c
@@ -385,11 +385,46 @@ make_const(ParseState *pstate, A_Const *aconst)
 			{
 				/* could be an oversize integer as well as a float ... */
 
+				int			base = 10;
+				char	   *startptr;
+				int			sign;
+				char	   *testvalue;
 				int64		val64;
 				char	   *endptr;
 
+				startptr = aconst->val.fval.fval;
+				if (startptr[0] == '-')
+				{
+					sign = -1;
+					startptr++;
+				}
+				else
+					sign = +1;
+				if (startptr[0] == '0')
+				{
+					if (startptr[1] == 'b' || startptr[1] == 'B')
+					{
+						base = 2;
+						startptr += 2;
+					}
+					else if (startptr[1] == 'o' || startptr[1] == 'O')
+					{
+						base = 8;
+						startptr += 2;
+					}
+					if (startptr[1] == 'x' || startptr[1] == 'X')
+					{
+						base = 16;
+						startptr += 2;
+					}
+				}
+
+				if (sign == +1)
+					testvalue = startptr;
+				else
+					testvalue = psprintf("-%s", startptr);
 				errno = 0;
-				val64 = strtoi64(aconst->val.fval.fval, &endptr, 10);
+				val64 = strtoi64(testvalue, &endptr, base);
 				if (errno == 0 && *endptr == '\0')
 				{
 					/*
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index db8b0fe8ebc..9ad9e0c8ba7 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -124,7 +124,7 @@ static void addlit(char *ytext, int yleng, core_yyscan_t yyscanner);
 static void addlitchar(unsigned char ychar, core_yyscan_t yyscanner);
 static char *litbufdup(core_yyscan_t yyscanner);
 static unsigned char unescape_single_char(unsigned char c, core_yyscan_t yyscanner);
-static int	process_integer_literal(const char *token, YYSTYPE *lval);
+static int	process_integer_literal(const char *token, YYSTYPE *lval, int base);
 static void addunicode(pg_wchar c, yyscan_t yyscanner);
 
 #define yyerror(msg)  scanner_yyerror(msg, yyscanner)
@@ -385,25 +385,40 @@ operator		{op_chars}+
  * Unary minus is not part of a number here.  Instead we pass it separately to
  * the parser, and there it gets coerced via doNegate().
  *
- * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
+ * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
  *
  * {realfail} is added to prevent the need for scanner
  * backup when the {real} rule fails to match completely.
  */
-digit			[0-9]
-
-integer			{digit}+
-decimal			(({digit}*\.{digit}+)|({digit}+\.{digit}*))
-decimalfail		{digit}+\.\.
-real			({integer}|{decimal})[Ee][-+]?{digit}+
-realfail		({integer}|{decimal})[Ee][-+]
-
-integer_junk	{integer}{ident_start}
-decimal_junk	{decimal}{ident_start}
+decdigit		[0-9]
+hexdigit		[0-9A-Fa-f]
+octdigit		[0-7]
+bindigit		[0-1]
+
+decinteger		{decdigit}+
+hexinteger		0[xX]{hexdigit}+
+octinteger		0[oO]{octdigit}+
+bininteger		0[bB]{bindigit}+
+
+hexfail			0[xX]
+octfail			0[oO]
+binfail			0[bB]
+
+numeric			(({decinteger}\.{decinteger}?)|(\.{decinteger}))
+numericfail		{decdigit}+\.\.
+
+real			({decinteger}|{numeric})[Ee][-+]?{decdigit}+
+realfail		({decinteger}|{numeric})[Ee][-+]
+
+decinteger_junk	{decinteger}{ident_start}
+hexinteger_junk	{hexinteger}{ident_start}
+octinteger_junk	{octinteger}{ident_start}
+bininteger_junk	{bininteger}{ident_start}
+numeric_junk	{numeric}{ident_start}
 real_junk		{real}{ident_start}
 
-param			\${integer}
-param_junk		\${integer}{ident_start}
+param			\${decinteger}
+param_junk		\${decinteger}{ident_start}
 
 other			.
 
@@ -983,20 +998,44 @@ other			.
 					yyerror("trailing junk after parameter");
 				}
 
-{integer}		{
+{decinteger}	{
+					SET_YYLLOC();
+					return process_integer_literal(yytext, yylval, 10);
+				}
+{hexinteger}	{
+					SET_YYLLOC();
+					return process_integer_literal(yytext, yylval, 16);
+				}
+{octinteger}	{
+					SET_YYLLOC();
+					return process_integer_literal(yytext, yylval, 8);
+				}
+{bininteger}	{
+					SET_YYLLOC();
+					return process_integer_literal(yytext, yylval, 2);
+				}
+{hexfail}		{
+					SET_YYLLOC();
+					yyerror("invalid hexadecimal integer");
+				}
+{octfail}		{
 					SET_YYLLOC();
-					return process_integer_literal(yytext, yylval);
+					yyerror("invalid octal integer");
 				}
-{decimal}		{
+{binfail}		{
+					SET_YYLLOC();
+					yyerror("invalid binary integer");
+				}
+{numeric}		{
 					SET_YYLLOC();
 					yylval->str = pstrdup(yytext);
 					return FCONST;
 				}
-{decimalfail}	{
+{numericfail}	{
 					/* throw back the .., and treat as integer */
 					yyless(yyleng - 2);
 					SET_YYLLOC();
-					return process_integer_literal(yytext, yylval);
+					return process_integer_literal(yytext, yylval, 10);
 				}
 {real}			{
 					SET_YYLLOC();
@@ -1007,11 +1046,23 @@ other			.
 					SET_YYLLOC();
 					yyerror("trailing junk after numeric literal");
 				}
-{integer_junk}	{
+{decinteger_junk}	{
+					SET_YYLLOC();
+					yyerror("trailing junk after numeric literal");
+				}
+{hexinteger_junk}	{
+					SET_YYLLOC();
+					yyerror("trailing junk after numeric literal");
+				}
+{octinteger_junk}	{
+					SET_YYLLOC();
+					yyerror("trailing junk after numeric literal");
+				}
+{bininteger_junk}	{
 					SET_YYLLOC();
 					yyerror("trailing junk after numeric literal");
 				}
-{decimal_junk}	{
+{numeric_junk}	{
 					SET_YYLLOC();
 					yyerror("trailing junk after numeric literal");
 				}
@@ -1307,17 +1358,17 @@ litbufdup(core_yyscan_t yyscanner)
 }
 
 /*
- * Process {integer}.  Note this will also do the right thing with {decimal},
- * ie digits and a decimal point.
+ * Process {decinteger}, {hexinteger}, etc.  Note this will also do the right
+ * thing with {numeric}, ie digits and a decimal point.
  */
 static int
-process_integer_literal(const char *token, YYSTYPE *lval)
+process_integer_literal(const char *token, YYSTYPE *lval, int base)
 {
 	int			val;
 	char	   *endptr;
 
 	errno = 0;
-	val = strtoint(token, &endptr, 10);
+	val = strtoint(base == 10 ? token : token + 2, &endptr, base);
 	if (*endptr != '\0' || errno == ERANGE)
 	{
 		/* integer too large (or contains decimal pt), treat it as a float */
diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index ab1564f22da..7cded73e6e6 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -85,6 +85,17 @@ decimalLength64(const uint64 v)
 	return t + (v >= PowersOfTen[t]);
 }
 
+static const int8 hexlookup[128] = {
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
+	-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+};
+
 /*
  * Convert input string to a signed 16 bit integer.
  *
@@ -108,6 +119,7 @@ int16
 pg_strtoint16_safe(const char *s, Node *escontext)
 {
 	const char *ptr = s;
+	const char *firstdigit;
 	uint16		tmp = 0;
 	bool		neg = false;
 
@@ -124,19 +136,60 @@ pg_strtoint16_safe(const char *s, Node *escontext)
 	else if (*ptr == '+')
 		ptr++;
 
-	/* require at least one digit */
-	if (unlikely(!isdigit((unsigned char) *ptr)))
-		goto invalid_syntax;
-
 	/* process digits */
-	while (*ptr && isdigit((unsigned char) *ptr))
+	if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
 	{
-		if (unlikely(tmp > -(PG_INT16_MIN / 10)))
-			goto out_of_range;
+		firstdigit = ptr += 2;
+
+		while (*ptr && isxdigit((unsigned char) *ptr))
+		{
+			if (unlikely(tmp > -(PG_INT16_MIN / 16)))
+				goto out_of_range;
+
+			tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++];
+		}
+	}
+	else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
+	{
+		firstdigit = ptr += 2;
+
+		while (*ptr && (*ptr >= '0' && *ptr <= '7'))
+		{
+			if (unlikely(tmp > -(PG_INT16_MIN / 8)))
+				goto out_of_range;
+
+			tmp = tmp * 8 + (*ptr++ - '0');
+		}
+	}
+	else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
+	{
+		firstdigit = ptr += 2;
+
+		while (*ptr && (*ptr >= '0' && *ptr <= '1'))
+		{
+			if (unlikely(tmp > -(PG_INT16_MIN / 2)))
+				goto out_of_range;
+
+			tmp = tmp * 2 + (*ptr++ - '0');
+		}
+	}
+	else
+	{
+		firstdigit = ptr;
 
-		tmp = tmp * 10 + (*ptr++ - '0');
+		while (*ptr && isdigit((unsigned char) *ptr))
+		{
+			if (unlikely(tmp > -(PG_INT16_MIN / 10)))
+				goto out_of_range;
+
+			tmp = tmp * 10 + (*ptr++ - '0');
+		}
 	}
 
+	/* require at least one digit */
+	if (unlikely(ptr == firstdigit))
+		goto invalid_syntax;
+
 	/* allow trailing whitespace, but not other trailing chars */
 	while (*ptr != '\0' && isspace((unsigned char) *ptr))
 		ptr++;
@@ -193,6 +246,7 @@ int32
 pg_strtoint32_safe(const char *s, Node *escontext)
 {
 	const char *ptr = s;
+	const char *firstdigit;
 	uint32		tmp = 0;
 	bool		neg = false;
 
@@ -209,19 +263,60 @@ pg_strtoint32_safe(const char *s, Node *escontext)
 	else if (*ptr == '+')
 		ptr++;
 
-	/* require at least one digit */
-	if (unlikely(!isdigit((unsigned char) *ptr)))
-		goto invalid_syntax;
-
 	/* process digits */
-	while (*ptr && isdigit((unsigned char) *ptr))
+	if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
 	{
-		if (unlikely(tmp > -(PG_INT32_MIN / 10)))
-			goto out_of_range;
+		firstdigit = ptr += 2;
+
+		while (*ptr && isxdigit((unsigned char) *ptr))
+		{
+			if (unlikely(tmp > -(PG_INT32_MIN / 16)))
+				goto out_of_range;
+
+			tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++];
+		}
+	}
+	else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
+	{
+		firstdigit = ptr += 2;
+
+		while (*ptr && (*ptr >= '0' && *ptr <= '7'))
+		{
+			if (unlikely(tmp > -(PG_INT32_MIN / 8)))
+				goto out_of_range;
+
+			tmp = tmp * 8 + (*ptr++ - '0');
+		}
+	}
+	else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
+	{
+		firstdigit = ptr += 2;
+
+		while (*ptr && (*ptr >= '0' && *ptr <= '1'))
+		{
+			if (unlikely(tmp > -(PG_INT32_MIN / 2)))
+				goto out_of_range;
+
+			tmp = tmp * 2 + (*ptr++ - '0');
+		}
+	}
+	else
+	{
+		firstdigit = ptr;
+
+		while (*ptr && isdigit((unsigned char) *ptr))
+		{
+			if (unlikely(tmp > -(PG_INT32_MIN / 10)))
+				goto out_of_range;
 
-		tmp = tmp * 10 + (*ptr++ - '0');
+			tmp = tmp * 10 + (*ptr++ - '0');
+		}
 	}
 
+	/* require at least one digit */
+	if (unlikely(ptr == firstdigit))
+		goto invalid_syntax;
+
 	/* allow trailing whitespace, but not other trailing chars */
 	while (*ptr != '\0' && isspace((unsigned char) *ptr))
 		ptr++;
@@ -278,6 +373,7 @@ int64
 pg_strtoint64_safe(const char *s, Node *escontext)
 {
 	const char *ptr = s;
+	const char *firstdigit;
 	uint64		tmp = 0;
 	bool		neg = false;
 
@@ -294,18 +390,59 @@ pg_strtoint64_safe(const char *s, Node *escontext)
 	else if (*ptr == '+')
 		ptr++;
 
-	/* require at least one digit */
-	if (unlikely(!isdigit((unsigned char) *ptr)))
-		goto invalid_syntax;
-
 	/* process digits */
-	while (*ptr && isdigit((unsigned char) *ptr))
+	if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
 	{
-		if (unlikely(tmp > -(PG_INT64_MIN / 10)))
-			goto out_of_range;
+		firstdigit = ptr += 2;
+
+		while (*ptr && isxdigit((unsigned char) *ptr))
+		{
+			if (unlikely(tmp > -(PG_INT64_MIN / 16)))
+				goto out_of_range;
 
-		tmp = tmp * 10 + (*ptr++ - '0');
+			tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++];
+		}
 	}
+	else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
+	{
+		firstdigit = ptr += 2;
+
+		while (*ptr && (*ptr >= '0' && *ptr <= '7'))
+		{
+			if (unlikely(tmp > -(PG_INT64_MIN / 8)))
+				goto out_of_range;
+
+			tmp = tmp * 8 + (*ptr++ - '0');
+		}
+	}
+	else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
+	{
+		firstdigit = ptr += 2;
+
+		while (*ptr && (*ptr >= '0' && *ptr <= '1'))
+		{
+			if (unlikely(tmp > -(PG_INT64_MIN / 2)))
+				goto out_of_range;
+
+			tmp = tmp * 2 + (*ptr++ - '0');
+		}
+	}
+	else
+	{
+		firstdigit = ptr;
+
+		while (*ptr && isdigit((unsigned char) *ptr))
+		{
+			if (unlikely(tmp > -(PG_INT64_MIN / 10)))
+				goto out_of_range;
+
+			tmp = tmp * 10 + (*ptr++ - '0');
+		}
+	}
+
+	/* require at least one digit */
+	if (unlikely(ptr == firstdigit))
+		goto invalid_syntax;
 
 	/* allow trailing whitespace, but not other trailing chars */
 	while (*ptr != '\0' && isspace((unsigned char) *ptr))
author	Peter Eisentraut	2022-12-14 04:40:38 +0000
committer	Peter Eisentraut	2022-12-14 05:17:07 +0000
commit	6fcda9aba83449082124825b6d375c0a61e21c42 (patch)
tree	d2e23f5322bf6879e0ee328593fbc7b3f6f71702 /src/backend
parent	60684dd834a222fefedd49b19d1f0a6189c1632e (diff)