diff options
-rw-r--r-- | doc/src/sgml/func.sgml | 28 | ||||
-rw-r--r-- | src/backend/utils/adt/formatting.c | 134 | ||||
-rw-r--r-- | src/test/regress/expected/numeric.out | 56 | ||||
-rw-r--r-- | src/test/regress/sql/numeric.sql | 11 |
4 files changed, 197 insertions, 32 deletions
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index f901567f7e2..35a845c4001 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -5850,7 +5850,10 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}'); data based on the given value. Any text that is not a template pattern is simply copied verbatim. Similarly, in an input template string (for the other functions), template patterns identify the values to be supplied by - the input data string. + the input data string. If there are characters in the template string + that are not template patterns, the corresponding characters in the input + data string are simply skipped over (whether or not they are equal to the + template string characters). </para> <para> @@ -6176,13 +6179,15 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}'); Ordinary text is allowed in <function>to_char</function> templates and will be output literally. You can put a substring in double quotes to force it to be interpreted as literal text - even if it contains pattern key words. For example, in + even if it contains template patterns. For example, in <literal>'"Hello Year "YYYY'</literal>, the <literal>YYYY</literal> will be replaced by the year data, but the single <literal>Y</literal> in <literal>Year</literal> - will not be. In <function>to_date</function>, <function>to_number</function>, - and <function>to_timestamp</function>, double-quoted strings skip the number of - input characters contained in the string, e.g. <literal>"XX"</literal> - skips two input characters. + will not be. + In <function>to_date</function>, <function>to_number</function>, + and <function>to_timestamp</function>, literal text and double-quoted + strings result in skipping the number of characters contained in the + string; for example <literal>"XX"</literal> skips two input characters + (whether or not they are <literal>XX</literal>). </para> </listitem> @@ -6485,6 +6490,17 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}'); <listitem> <para> + In <function>to_number</function>, if non-data template patterns such + as <literal>L</literal> or <literal>TH</literal> are used, the + corresponding number of input characters are skipped, whether or not + they match the template pattern, unless they are data characters + (that is, digits, sign, decimal point, or comma). For + example, <literal>TH</literal> would skip two non-data characters. + </para> + </listitem> + + <listitem> + <para> <literal>V</literal> with <function>to_char</function> multiplies the input values by <literal>10^<replaceable>n</replaceable></literal>, where diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index 50254f23888..5afc293a5a0 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -988,7 +988,7 @@ static char *get_last_relevant_decnum(char *num); static void NUM_numpart_from_char(NUMProc *Np, int id, int input_len); static void NUM_numpart_to_char(NUMProc *Np, int id); static char *NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, - char *number, int from_char_input_len, int to_char_out_pre_spaces, + char *number, int input_len, int to_char_out_pre_spaces, int sign, bool is_to_char, Oid collid); static DCHCacheEntry *DCH_cache_getnew(const char *str); static DCHCacheEntry *DCH_cache_search(const char *str); @@ -4232,6 +4232,14 @@ get_last_relevant_decnum(char *num) return result; } +/* + * These macros are used in NUM_processor() and its subsidiary routines. + * OVERLOAD_TEST: true if we've reached end of input string + * AMOUNT_TEST(s): true if at least s characters remain in string + */ +#define OVERLOAD_TEST (Np->inout_p >= Np->inout + input_len) +#define AMOUNT_TEST(s) (Np->inout_p <= Np->inout + (input_len - (s))) + /* ---------- * Number extraction for TO_NUMBER() * ---------- @@ -4246,9 +4254,6 @@ NUM_numpart_from_char(NUMProc *Np, int id, int input_len) (id == NUM_0 || id == NUM_9) ? "NUM_0/9" : id == NUM_DEC ? "NUM_DEC" : "???"); #endif -#define OVERLOAD_TEST (Np->inout_p >= Np->inout + input_len) -#define AMOUNT_TEST(_s) (input_len-(Np->inout_p-Np->inout) >= _s) - if (OVERLOAD_TEST) return; @@ -4641,14 +4646,32 @@ NUM_numpart_to_char(NUMProc *Np, int id) ++Np->num_curr; } +/* + * Skip over "n" input characters, but only if they aren't numeric data + */ +static void +NUM_eat_non_data_chars(NUMProc *Np, int n, int input_len) +{ + while (n-- > 0) + { + if (OVERLOAD_TEST) + break; /* end of input */ + if (strchr("0123456789.,+-", *Np->inout_p) != NULL) + break; /* it's a data character */ + Np->inout_p += pg_mblen(Np->inout_p); + } +} + static char * NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, - char *number, int from_char_input_len, int to_char_out_pre_spaces, + char *number, int input_len, int to_char_out_pre_spaces, int sign, bool is_to_char, Oid collid) { FormatNode *n; NUMProc _Np, *Np = &_Np; + const char *pattern; + int pattern_len; MemSet(Np, 0, sizeof(NUMProc)); @@ -4816,9 +4839,11 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, if (!Np->is_to_char) { /* - * Check non-string inout end + * Check at least one character remains to be scanned. (In + * actions below, must use AMOUNT_TEST if we want to read more + * characters than that.) */ - if (Np->inout_p >= Np->inout + from_char_input_len) + if (OVERLOAD_TEST) break; } @@ -4828,12 +4853,16 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, if (n->type == NODE_TYPE_ACTION) { /* - * Create/reading digit/zero/blank/sing + * Create/read digit/zero/blank/sign/special-case * * 'NUM_S' note: The locale sign is anchored to number and we * read/write it when we work with first or last number - * (NUM_0/NUM_9). This is reason why NUM_S missing in follow - * switch(). + * (NUM_0/NUM_9). This is why NUM_S is missing in switch(). + * + * Notice the "Np->inout_p++" at the bottom of the loop. This is + * why most of the actions advance inout_p one less than you might + * expect. In cases where we don't want that increment to happen, + * a switch case ends with "continue" not "break". */ switch (n->key->id) { @@ -4848,7 +4877,7 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, } else { - NUM_numpart_from_char(Np, n->key->id, from_char_input_len); + NUM_numpart_from_char(Np, n->key->id, input_len); break; /* switch() case: */ } @@ -4872,10 +4901,14 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, if (IS_FILLMODE(Np->Num)) continue; } + if (*Np->inout_p != ',') + continue; } break; case NUM_G: + pattern = Np->L_thousands_sep; + pattern_len = strlen(pattern); if (Np->is_to_char) { if (!Np->num_in) @@ -4884,16 +4917,16 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, continue; else { - int x = strlen(Np->L_thousands_sep); - - memset(Np->inout_p, ' ', x); - Np->inout_p += x - 1; + /* just in case there are MB chars */ + pattern_len = pg_mbstrlen(pattern); + memset(Np->inout_p, ' ', pattern_len); + Np->inout_p += pattern_len - 1; } } else { - strcpy(Np->inout_p, Np->L_thousands_sep); - Np->inout_p += strlen(Np->inout_p) - 1; + strcpy(Np->inout_p, pattern); + Np->inout_p += pattern_len - 1; } } else @@ -4903,18 +4936,33 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, if (IS_FILLMODE(Np->Num)) continue; } - Np->inout_p += strlen(Np->L_thousands_sep) - 1; + + /* + * Because L_thousands_sep typically contains data + * characters (either '.' or ','), we can't use + * NUM_eat_non_data_chars here. Instead skip only if + * the input matches L_thousands_sep. + */ + if (AMOUNT_TEST(pattern_len) && + strncmp(Np->inout_p, pattern, pattern_len) == 0) + Np->inout_p += pattern_len - 1; + else + continue; } break; case NUM_L: + pattern = Np->L_currency_symbol; if (Np->is_to_char) { - strcpy(Np->inout_p, Np->L_currency_symbol); - Np->inout_p += strlen(Np->inout_p) - 1; + strcpy(Np->inout_p, pattern); + Np->inout_p += strlen(pattern) - 1; } else - Np->inout_p += strlen(Np->L_currency_symbol) - 1; + { + NUM_eat_non_data_chars(Np, pg_mbstrlen(pattern), input_len); + continue; + } break; case NUM_RN: @@ -4949,8 +4997,16 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, continue; if (Np->is_to_char) + { strcpy(Np->inout_p, get_th(Np->number, TH_LOWER)); - Np->inout_p += 1; + Np->inout_p += 1; + } + else + { + /* All variants of 'th' occupy 2 characters */ + NUM_eat_non_data_chars(Np, 2, input_len); + continue; + } break; case NUM_TH: @@ -4959,8 +5015,16 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, continue; if (Np->is_to_char) + { strcpy(Np->inout_p, get_th(Np->number, TH_UPPER)); - Np->inout_p += 1; + Np->inout_p += 1; + } + else + { + /* All variants of 'TH' occupy 2 characters */ + NUM_eat_non_data_chars(Np, 2, input_len); + continue; + } break; case NUM_MI: @@ -4977,6 +5041,11 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, { if (*Np->inout_p == '-') *Np->number = '-'; + else + { + NUM_eat_non_data_chars(Np, 1, input_len); + continue; + } } break; @@ -4994,23 +5063,31 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, { if (*Np->inout_p == '+') *Np->number = '+'; + else + { + NUM_eat_non_data_chars(Np, 1, input_len); + continue; + } } break; case NUM_SG: if (Np->is_to_char) *Np->inout_p = Np->sign; - else { if (*Np->inout_p == '-') *Np->number = '-'; else if (*Np->inout_p == '+') *Np->number = '+'; + else + { + NUM_eat_non_data_chars(Np, 1, input_len); + continue; + } } break; - default: continue; break; @@ -5019,7 +5096,12 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, else { /* - * Remove to output char from input in TO_CHAR + * In TO_CHAR, non-pattern characters in the format are copied to + * the output. In TO_NUMBER, we skip one input character for each + * non-pattern format character, whether or not it matches the + * format character. (Currently, that's actually implemented as + * skipping one input byte per non-pattern format byte, which is + * wrong...) */ if (Np->is_to_char) *Np->inout_p = n->character; diff --git a/src/test/regress/expected/numeric.out b/src/test/regress/expected/numeric.out index 7e55b0e2931..a96bfc0eb04 100644 --- a/src/test/regress/expected/numeric.out +++ b/src/test/regress/expected/numeric.out @@ -1219,6 +1219,7 @@ SELECT '' AS to_char_26, to_char('100'::numeric, 'FM999'); -- TO_NUMBER() -- +SET lc_numeric = 'C'; SELECT '' AS to_number_1, to_number('-34,338,492', '99G999G999'); to_number_1 | to_number -------------+----------- @@ -1297,6 +1298,61 @@ SELECT '' AS to_number_13, to_number(' . 0 1-', ' 9 9 . 9 9 S'); | -0.01 (1 row) +SELECT '' AS to_number_14, to_number('34,50','999,99'); + to_number_14 | to_number +--------------+----------- + | 3450 +(1 row) + +SELECT '' AS to_number_15, to_number('123,000','999G'); + to_number_15 | to_number +--------------+----------- + | 123 +(1 row) + +SELECT '' AS to_number_16, to_number('123456','999G999'); + to_number_16 | to_number +--------------+----------- + | 123456 +(1 row) + +SELECT '' AS to_number_17, to_number('$1234.56','L9,999.99'); + to_number_17 | to_number +--------------+----------- + | 1234.56 +(1 row) + +SELECT '' AS to_number_18, to_number('$1234.56','L99,999.99'); + to_number_18 | to_number +--------------+----------- + | 1234.56 +(1 row) + +SELECT '' AS to_number_19, to_number('$1,234.56','L99,999.99'); + to_number_19 | to_number +--------------+----------- + | 1234.56 +(1 row) + +SELECT '' AS to_number_20, to_number('1234.56','L99,999.99'); + to_number_20 | to_number +--------------+----------- + | 1234.56 +(1 row) + +SELECT '' AS to_number_21, to_number('1,234.56','L99,999.99'); + to_number_21 | to_number +--------------+----------- + | 1234.56 +(1 row) + +SELECT '' AS to_number_22, to_number('42nd', '99th'); + to_number_22 | to_number +--------------+----------- + | 42 +(1 row) + +RESET lc_numeric; -- -- Input syntax -- diff --git a/src/test/regress/sql/numeric.sql b/src/test/regress/sql/numeric.sql index 9675b6eabf3..321c7bdf7c5 100644 --- a/src/test/regress/sql/numeric.sql +++ b/src/test/regress/sql/numeric.sql @@ -788,6 +788,7 @@ SELECT '' AS to_char_26, to_char('100'::numeric, 'FM999'); -- TO_NUMBER() -- +SET lc_numeric = 'C'; SELECT '' AS to_number_1, to_number('-34,338,492', '99G999G999'); SELECT '' AS to_number_2, to_number('-34,338,492.654,878', '99G999G999D999G999'); SELECT '' AS to_number_3, to_number('<564646.654564>', '999999.999999PR'); @@ -801,6 +802,16 @@ SELECT '' AS to_number_10, to_number('0', '99.99'); SELECT '' AS to_number_11, to_number('.-01', 'S99.99'); SELECT '' AS to_number_12, to_number('.01-', '99.99S'); SELECT '' AS to_number_13, to_number(' . 0 1-', ' 9 9 . 9 9 S'); +SELECT '' AS to_number_14, to_number('34,50','999,99'); +SELECT '' AS to_number_15, to_number('123,000','999G'); +SELECT '' AS to_number_16, to_number('123456','999G999'); +SELECT '' AS to_number_17, to_number('$1234.56','L9,999.99'); +SELECT '' AS to_number_18, to_number('$1234.56','L99,999.99'); +SELECT '' AS to_number_19, to_number('$1,234.56','L99,999.99'); +SELECT '' AS to_number_20, to_number('1234.56','L99,999.99'); +SELECT '' AS to_number_21, to_number('1,234.56','L99,999.99'); +SELECT '' AS to_number_22, to_number('42nd', '99th'); +RESET lc_numeric; -- -- Input syntax |