diff options
| author | Peter Eisentraut | 2022-12-14 04:40:38 +0000 |
|---|---|---|
| committer | Peter Eisentraut | 2022-12-14 05:17:07 +0000 |
| commit | 6fcda9aba83449082124825b6d375c0a61e21c42 (patch) | |
| tree | d2e23f5322bf6879e0ee328593fbc7b3f6f71702 /src/backend | |
| parent | 60684dd834a222fefedd49b19d1f0a6189c1632e (diff) | |
Non-decimal integer literals
Add support for hexadecimal, octal, and binary integer literals:
0x42F
0o273
0b100101
per SQL:202x draft.
This adds support in the lexer as well as in the integer type input
functions.
Reviewed-by: John Naylor <john.naylor@enterprisedb.com>
Reviewed-by: Zhihong Yu <zyu@yugabyte.com>
Reviewed-by: David Rowley <dgrowleyml@gmail.com>
Reviewed-by: Dean Rasheed <dean.a.rasheed@gmail.com>
Discussion: https://www.postgresql.org/message-id/flat/b239564c-cad0-b23e-c57e-166d883cb97d@enterprisedb.com
Diffstat (limited to 'src/backend')
| -rw-r--r-- | src/backend/catalog/information_schema.sql | 6 | ||||
| -rw-r--r-- | src/backend/catalog/sql_features.txt | 1 | ||||
| -rw-r--r-- | src/backend/parser/parse_node.c | 37 | ||||
| -rw-r--r-- | src/backend/parser/scan.l | 101 | ||||
| -rw-r--r-- | src/backend/utils/adt/numutils.c | 185 |
5 files changed, 277 insertions, 53 deletions
diff --git a/src/backend/catalog/information_schema.sql b/src/backend/catalog/information_schema.sql index 18725a02d1f..95c27a625e7 100644 --- a/src/backend/catalog/information_schema.sql +++ b/src/backend/catalog/information_schema.sql @@ -119,7 +119,7 @@ RETURN WHEN 1700 /*numeric*/ THEN CASE WHEN $2 = -1 THEN null - ELSE (($2 - 4) >> 16) & 65535 + ELSE (($2 - 4) >> 16) & 0xFFFF END WHEN 700 /*float4*/ THEN 24 /*FLT_MANT_DIG*/ WHEN 701 /*float8*/ THEN 53 /*DBL_MANT_DIG*/ @@ -147,7 +147,7 @@ RETURN WHEN $1 IN (1700) THEN CASE WHEN $2 = -1 THEN null - ELSE ($2 - 4) & 65535 + ELSE ($2 - 4) & 0xFFFF END ELSE null END; @@ -163,7 +163,7 @@ RETURN WHEN $1 IN (1083, 1114, 1184, 1266) /* time, timestamp, same + tz */ THEN CASE WHEN $2 < 0 THEN 6 ELSE $2 END WHEN $1 IN (1186) /* interval */ - THEN CASE WHEN $2 < 0 OR $2 & 65535 = 65535 THEN 6 ELSE $2 & 65535 END + THEN CASE WHEN $2 < 0 OR $2 & 0xFFFF = 0xFFFF THEN 6 ELSE $2 & 0xFFFF END ELSE null END; diff --git a/src/backend/catalog/sql_features.txt b/src/backend/catalog/sql_features.txt index 8704a42b60a..abad216b7ee 100644 --- a/src/backend/catalog/sql_features.txt +++ b/src/backend/catalog/sql_features.txt @@ -527,6 +527,7 @@ T652 SQL-dynamic statements in SQL routines NO T653 SQL-schema statements in external routines YES T654 SQL-dynamic statements in external routines NO T655 Cyclically dependent routines YES +T661 Non-decimal integer literals YES SQL:202x draft T811 Basic SQL/JSON constructor functions NO T812 SQL/JSON: JSON_OBJECTAGG NO T813 SQL/JSON: JSON_ARRAYAGG with ORDER BY NO diff --git a/src/backend/parser/parse_node.c b/src/backend/parser/parse_node.c index 4014db4b80f..d33e3c179df 100644 --- a/src/backend/parser/parse_node.c +++ b/src/backend/parser/parse_node.c @@ -385,11 +385,46 @@ make_const(ParseState *pstate, A_Const *aconst) { /* could be an oversize integer as well as a float ... */ + int base = 10; + char *startptr; + int sign; + char *testvalue; int64 val64; char *endptr; + startptr = aconst->val.fval.fval; + if (startptr[0] == '-') + { + sign = -1; + startptr++; + } + else + sign = +1; + if (startptr[0] == '0') + { + if (startptr[1] == 'b' || startptr[1] == 'B') + { + base = 2; + startptr += 2; + } + else if (startptr[1] == 'o' || startptr[1] == 'O') + { + base = 8; + startptr += 2; + } + if (startptr[1] == 'x' || startptr[1] == 'X') + { + base = 16; + startptr += 2; + } + } + + if (sign == +1) + testvalue = startptr; + else + testvalue = psprintf("-%s", startptr); errno = 0; - val64 = strtoi64(aconst->val.fval.fval, &endptr, 10); + val64 = strtoi64(testvalue, &endptr, base); if (errno == 0 && *endptr == '\0') { /* diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index db8b0fe8ebc..9ad9e0c8ba7 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -124,7 +124,7 @@ static void addlit(char *ytext, int yleng, core_yyscan_t yyscanner); static void addlitchar(unsigned char ychar, core_yyscan_t yyscanner); static char *litbufdup(core_yyscan_t yyscanner); static unsigned char unescape_single_char(unsigned char c, core_yyscan_t yyscanner); -static int process_integer_literal(const char *token, YYSTYPE *lval); +static int process_integer_literal(const char *token, YYSTYPE *lval, int base); static void addunicode(pg_wchar c, yyscan_t yyscanner); #define yyerror(msg) scanner_yyerror(msg, yyscanner) @@ -385,25 +385,40 @@ operator {op_chars}+ * Unary minus is not part of a number here. Instead we pass it separately to * the parser, and there it gets coerced via doNegate(). * - * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10. + * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 10. * * {realfail} is added to prevent the need for scanner * backup when the {real} rule fails to match completely. */ -digit [0-9] - -integer {digit}+ -decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*)) -decimalfail {digit}+\.\. -real ({integer}|{decimal})[Ee][-+]?{digit}+ -realfail ({integer}|{decimal})[Ee][-+] - -integer_junk {integer}{ident_start} -decimal_junk {decimal}{ident_start} +decdigit [0-9] +hexdigit [0-9A-Fa-f] +octdigit [0-7] +bindigit [0-1] + +decinteger {decdigit}+ +hexinteger 0[xX]{hexdigit}+ +octinteger 0[oO]{octdigit}+ +bininteger 0[bB]{bindigit}+ + +hexfail 0[xX] +octfail 0[oO] +binfail 0[bB] + +numeric (({decinteger}\.{decinteger}?)|(\.{decinteger})) +numericfail {decdigit}+\.\. + +real ({decinteger}|{numeric})[Ee][-+]?{decdigit}+ +realfail ({decinteger}|{numeric})[Ee][-+] + +decinteger_junk {decinteger}{ident_start} +hexinteger_junk {hexinteger}{ident_start} +octinteger_junk {octinteger}{ident_start} +bininteger_junk {bininteger}{ident_start} +numeric_junk {numeric}{ident_start} real_junk {real}{ident_start} -param \${integer} -param_junk \${integer}{ident_start} +param \${decinteger} +param_junk \${decinteger}{ident_start} other . @@ -983,20 +998,44 @@ other . yyerror("trailing junk after parameter"); } -{integer} { +{decinteger} { + SET_YYLLOC(); + return process_integer_literal(yytext, yylval, 10); + } +{hexinteger} { + SET_YYLLOC(); + return process_integer_literal(yytext, yylval, 16); + } +{octinteger} { + SET_YYLLOC(); + return process_integer_literal(yytext, yylval, 8); + } +{bininteger} { + SET_YYLLOC(); + return process_integer_literal(yytext, yylval, 2); + } +{hexfail} { + SET_YYLLOC(); + yyerror("invalid hexadecimal integer"); + } +{octfail} { SET_YYLLOC(); - return process_integer_literal(yytext, yylval); + yyerror("invalid octal integer"); } -{decimal} { +{binfail} { + SET_YYLLOC(); + yyerror("invalid binary integer"); + } +{numeric} { SET_YYLLOC(); yylval->str = pstrdup(yytext); return FCONST; } -{decimalfail} { +{numericfail} { /* throw back the .., and treat as integer */ yyless(yyleng - 2); SET_YYLLOC(); - return process_integer_literal(yytext, yylval); + return process_integer_literal(yytext, yylval, 10); } {real} { SET_YYLLOC(); @@ -1007,11 +1046,23 @@ other . SET_YYLLOC(); yyerror("trailing junk after numeric literal"); } -{integer_junk} { +{decinteger_junk} { + SET_YYLLOC(); + yyerror("trailing junk after numeric literal"); + } +{hexinteger_junk} { + SET_YYLLOC(); + yyerror("trailing junk after numeric literal"); + } +{octinteger_junk} { + SET_YYLLOC(); + yyerror("trailing junk after numeric literal"); + } +{bininteger_junk} { SET_YYLLOC(); yyerror("trailing junk after numeric literal"); } -{decimal_junk} { +{numeric_junk} { SET_YYLLOC(); yyerror("trailing junk after numeric literal"); } @@ -1307,17 +1358,17 @@ litbufdup(core_yyscan_t yyscanner) } /* - * Process {integer}. Note this will also do the right thing with {decimal}, - * ie digits and a decimal point. + * Process {decinteger}, {hexinteger}, etc. Note this will also do the right + * thing with {numeric}, ie digits and a decimal point. */ static int -process_integer_literal(const char *token, YYSTYPE *lval) +process_integer_literal(const char *token, YYSTYPE *lval, int base) { int val; char *endptr; errno = 0; - val = strtoint(token, &endptr, 10); + val = strtoint(base == 10 ? token : token + 2, &endptr, base); if (*endptr != '\0' || errno == ERANGE) { /* integer too large (or contains decimal pt), treat it as a float */ diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c index ab1564f22da..7cded73e6e6 100644 --- a/src/backend/utils/adt/numutils.c +++ b/src/backend/utils/adt/numutils.c @@ -85,6 +85,17 @@ decimalLength64(const uint64 v) return t + (v >= PowersOfTen[t]); } +static const int8 hexlookup[128] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +}; + /* * Convert input string to a signed 16 bit integer. * @@ -108,6 +119,7 @@ int16 pg_strtoint16_safe(const char *s, Node *escontext) { const char *ptr = s; + const char *firstdigit; uint16 tmp = 0; bool neg = false; @@ -124,19 +136,60 @@ pg_strtoint16_safe(const char *s, Node *escontext) else if (*ptr == '+') ptr++; - /* require at least one digit */ - if (unlikely(!isdigit((unsigned char) *ptr))) - goto invalid_syntax; - /* process digits */ - while (*ptr && isdigit((unsigned char) *ptr)) + if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X')) { - if (unlikely(tmp > -(PG_INT16_MIN / 10))) - goto out_of_range; + firstdigit = ptr += 2; + + while (*ptr && isxdigit((unsigned char) *ptr)) + { + if (unlikely(tmp > -(PG_INT16_MIN / 16))) + goto out_of_range; + + tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++]; + } + } + else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O')) + { + firstdigit = ptr += 2; + + while (*ptr && (*ptr >= '0' && *ptr <= '7')) + { + if (unlikely(tmp > -(PG_INT16_MIN / 8))) + goto out_of_range; + + tmp = tmp * 8 + (*ptr++ - '0'); + } + } + else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B')) + { + firstdigit = ptr += 2; + + while (*ptr && (*ptr >= '0' && *ptr <= '1')) + { + if (unlikely(tmp > -(PG_INT16_MIN / 2))) + goto out_of_range; + + tmp = tmp * 2 + (*ptr++ - '0'); + } + } + else + { + firstdigit = ptr; - tmp = tmp * 10 + (*ptr++ - '0'); + while (*ptr && isdigit((unsigned char) *ptr)) + { + if (unlikely(tmp > -(PG_INT16_MIN / 10))) + goto out_of_range; + + tmp = tmp * 10 + (*ptr++ - '0'); + } } + /* require at least one digit */ + if (unlikely(ptr == firstdigit)) + goto invalid_syntax; + /* allow trailing whitespace, but not other trailing chars */ while (*ptr != '\0' && isspace((unsigned char) *ptr)) ptr++; @@ -193,6 +246,7 @@ int32 pg_strtoint32_safe(const char *s, Node *escontext) { const char *ptr = s; + const char *firstdigit; uint32 tmp = 0; bool neg = false; @@ -209,19 +263,60 @@ pg_strtoint32_safe(const char *s, Node *escontext) else if (*ptr == '+') ptr++; - /* require at least one digit */ - if (unlikely(!isdigit((unsigned char) *ptr))) - goto invalid_syntax; - /* process digits */ - while (*ptr && isdigit((unsigned char) *ptr)) + if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X')) { - if (unlikely(tmp > -(PG_INT32_MIN / 10))) - goto out_of_range; + firstdigit = ptr += 2; + + while (*ptr && isxdigit((unsigned char) *ptr)) + { + if (unlikely(tmp > -(PG_INT32_MIN / 16))) + goto out_of_range; + + tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++]; + } + } + else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O')) + { + firstdigit = ptr += 2; + + while (*ptr && (*ptr >= '0' && *ptr <= '7')) + { + if (unlikely(tmp > -(PG_INT32_MIN / 8))) + goto out_of_range; + + tmp = tmp * 8 + (*ptr++ - '0'); + } + } + else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B')) + { + firstdigit = ptr += 2; + + while (*ptr && (*ptr >= '0' && *ptr <= '1')) + { + if (unlikely(tmp > -(PG_INT32_MIN / 2))) + goto out_of_range; + + tmp = tmp * 2 + (*ptr++ - '0'); + } + } + else + { + firstdigit = ptr; + + while (*ptr && isdigit((unsigned char) *ptr)) + { + if (unlikely(tmp > -(PG_INT32_MIN / 10))) + goto out_of_range; - tmp = tmp * 10 + (*ptr++ - '0'); + tmp = tmp * 10 + (*ptr++ - '0'); + } } + /* require at least one digit */ + if (unlikely(ptr == firstdigit)) + goto invalid_syntax; + /* allow trailing whitespace, but not other trailing chars */ while (*ptr != '\0' && isspace((unsigned char) *ptr)) ptr++; @@ -278,6 +373,7 @@ int64 pg_strtoint64_safe(const char *s, Node *escontext) { const char *ptr = s; + const char *firstdigit; uint64 tmp = 0; bool neg = false; @@ -294,18 +390,59 @@ pg_strtoint64_safe(const char *s, Node *escontext) else if (*ptr == '+') ptr++; - /* require at least one digit */ - if (unlikely(!isdigit((unsigned char) *ptr))) - goto invalid_syntax; - /* process digits */ - while (*ptr && isdigit((unsigned char) *ptr)) + if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X')) { - if (unlikely(tmp > -(PG_INT64_MIN / 10))) - goto out_of_range; + firstdigit = ptr += 2; + + while (*ptr && isxdigit((unsigned char) *ptr)) + { + if (unlikely(tmp > -(PG_INT64_MIN / 16))) + goto out_of_range; - tmp = tmp * 10 + (*ptr++ - '0'); + tmp = tmp * 16 + hexlookup[(unsigned char) *ptr++]; + } } + else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O')) + { + firstdigit = ptr += 2; + + while (*ptr && (*ptr >= '0' && *ptr <= '7')) + { + if (unlikely(tmp > -(PG_INT64_MIN / 8))) + goto out_of_range; + + tmp = tmp * 8 + (*ptr++ - '0'); + } + } + else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B')) + { + firstdigit = ptr += 2; + + while (*ptr && (*ptr >= '0' && *ptr <= '1')) + { + if (unlikely(tmp > -(PG_INT64_MIN / 2))) + goto out_of_range; + + tmp = tmp * 2 + (*ptr++ - '0'); + } + } + else + { + firstdigit = ptr; + + while (*ptr && isdigit((unsigned char) *ptr)) + { + if (unlikely(tmp > -(PG_INT64_MIN / 10))) + goto out_of_range; + + tmp = tmp * 10 + (*ptr++ - '0'); + } + } + + /* require at least one digit */ + if (unlikely(ptr == firstdigit)) + goto invalid_syntax; /* allow trailing whitespace, but not other trailing chars */ while (*ptr != '\0' && isspace((unsigned char) *ptr)) |
