Tighten error checks in datetime input, and remove bogus "ISO" format.
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 16 Mar 2023 18:18:28 +0000 (14:18 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 16 Mar 2023 18:18:33 +0000 (14:18 -0400)
DecodeDateTime and DecodeTimeOnly had support for date input in the
style "Y2023M03D16", which the comments claimed to be an "ISO" format.
However, so far as I can find there is no such format in ISO 8601;
they write units before numbers in intervals, but not in datetimes.
Furthermore, the lesser-known ISO 8601-2 spec actually defines an
incompatible format "2023Y03M16D".  None of our documentation mentions
such a format either.  So let's just drop it.

That leaves us with only two cases for a prefix unit specifier in
datetimes: Julian dates written as Jnnnn, and the "T" separator
defined by ISO 8601.  Add checks to catch misuse of these specifiers,
that is consecutive specifiers or a dangling specifier at the end of
the string.  We do not however disallow a specifier that is separated
from the field that it disambiguates (by noise words or unrelated
fields).  That being the case, remove some overly-aggressive error
checks from the ISOTIME cases.

Joseph Koshakow, editorialized a bit by me; thanks also to
Peter Eisentraut for some standards-reading.

Discussion: https://postgr.es/m/CAAvxfHf2Q1gKLiHGnuPOiyf0ASvKUM4BnMfsXuwgtYEb_Gx0Zw@mail.gmail.com

src/backend/utils/adt/datetime.c
src/test/regress/expected/horology.out
src/test/regress/sql/horology.sql

index a7558d39a0e41eec4b3c8dcbde2b70407c136072..516ee9c154b25de74125800df7f935eab638b8dc 100644 (file)
@@ -983,7 +983,7 @@ DecodeDateTime(char **field, int *ftype, int nf,
    int         fmask = 0,
                tmask,
                type;
-   int         ptype = 0;      /* "prefix type" for ISO y2001m02d04 format */
+   int         ptype = 0;      /* "prefix type" for ISO and Julian formats */
    int         i;
    int         val;
    int         dterr;
@@ -1071,6 +1071,9 @@ DecodeDateTime(char **field, int *ftype, int nf,
                    {
                        char       *cp;
 
+                       /*
+                        * Allow a preceding "t" field, but no other units.
+                        */
                        if (ptype != 0)
                        {
                            /* Sanity check; should not fail this test */
@@ -1175,8 +1178,7 @@ DecodeDateTime(char **field, int *ftype, int nf,
            case DTK_NUMBER:
 
                /*
-                * Was this an "ISO date" with embedded field labels? An
-                * example is "y2001m02d04" - thomas 2001-02-04
+                * Deal with cases where previous field labeled this one
                 */
                if (ptype != 0)
                {
@@ -1187,85 +1189,11 @@ DecodeDateTime(char **field, int *ftype, int nf,
                    value = strtoint(field[i], &cp, 10);
                    if (errno == ERANGE)
                        return DTERR_FIELD_OVERFLOW;
-
-                   /*
-                    * only a few kinds are allowed to have an embedded
-                    * decimal
-                    */
-                   if (*cp == '.')
-                       switch (ptype)
-                       {
-                           case DTK_JULIAN:
-                           case DTK_TIME:
-                           case DTK_SECOND:
-                               break;
-                           default:
-                               return DTERR_BAD_FORMAT;
-                               break;
-                       }
-                   else if (*cp != '\0')
+                   if (*cp != '.' && *cp != '\0')
                        return DTERR_BAD_FORMAT;
 
                    switch (ptype)
                    {
-                       case DTK_YEAR:
-                           tm->tm_year = value;
-                           tmask = DTK_M(YEAR);
-                           break;
-
-                       case DTK_MONTH:
-
-                           /*
-                            * already have a month and hour? then assume
-                            * minutes
-                            */
-                           if ((fmask & DTK_M(MONTH)) != 0 &&
-                               (fmask & DTK_M(HOUR)) != 0)
-                           {
-                               tm->tm_min = value;
-                               tmask = DTK_M(MINUTE);
-                           }
-                           else
-                           {
-                               tm->tm_mon = value;
-                               tmask = DTK_M(MONTH);
-                           }
-                           break;
-
-                       case DTK_DAY:
-                           tm->tm_mday = value;
-                           tmask = DTK_M(DAY);
-                           break;
-
-                       case DTK_HOUR:
-                           tm->tm_hour = value;
-                           tmask = DTK_M(HOUR);
-                           break;
-
-                       case DTK_MINUTE:
-                           tm->tm_min = value;
-                           tmask = DTK_M(MINUTE);
-                           break;
-
-                       case DTK_SECOND:
-                           tm->tm_sec = value;
-                           tmask = DTK_M(SECOND);
-                           if (*cp == '.')
-                           {
-                               dterr = ParseFractionalSecond(cp, fsec);
-                               if (dterr)
-                                   return dterr;
-                               tmask = DTK_ALL_SECS_M;
-                           }
-                           break;
-
-                       case DTK_TZ:
-                           tmask = DTK_M(TZ);
-                           dterr = DecodeTimezone(field[i], tzp);
-                           if (dterr)
-                               return dterr;
-                           break;
-
                        case DTK_JULIAN:
                            /* previous field was a label for "julian date" */
                            if (value < 0)
@@ -1519,6 +1447,9 @@ DecodeDateTime(char **field, int *ftype, int nf,
 
                    case UNITS:
                        tmask = 0;
+                       /* reject consecutive unhandled units */
+                       if (ptype != 0)
+                           return DTERR_BAD_FORMAT;
                        ptype = val;
                        break;
 
@@ -1534,18 +1465,9 @@ DecodeDateTime(char **field, int *ftype, int nf,
                        if ((fmask & DTK_DATE_M) != DTK_DATE_M)
                            return DTERR_BAD_FORMAT;
 
-                       /***
-                        * We will need one of the following fields:
-                        *  DTK_NUMBER should be hhmmss.fff
-                        *  DTK_TIME should be hh:mm:ss.fff
-                        *  DTK_DATE should be hhmmss-zz
-                        ***/
-                       if (i >= nf - 1 ||
-                           (ftype[i + 1] != DTK_NUMBER &&
-                            ftype[i + 1] != DTK_TIME &&
-                            ftype[i + 1] != DTK_DATE))
+                       /* reject consecutive unhandled units */
+                       if (ptype != 0)
                            return DTERR_BAD_FORMAT;
-
                        ptype = val;
                        break;
 
@@ -1576,6 +1498,10 @@ DecodeDateTime(char **field, int *ftype, int nf,
        fmask |= tmask;
    }                           /* end loop over fields */
 
+   /* reject if prefix type appeared and was never handled */
+   if (ptype != 0)
+       return DTERR_BAD_FORMAT;
+
    /* do additional checking for normal date specs (but not "infinity" etc) */
    if (*dtype == DTK_DATE)
    {
@@ -1943,7 +1869,7 @@ DecodeTimeOnly(char **field, int *ftype, int nf,
    int         fmask = 0,
                tmask,
                type;
-   int         ptype = 0;      /* "prefix type" for ISO h04mm05s06 format */
+   int         ptype = 0;      /* "prefix type" for ISO and Julian formats */
    int         i;
    int         val;
    int         dterr;
@@ -2070,112 +1996,26 @@ DecodeTimeOnly(char **field, int *ftype, int nf,
            case DTK_NUMBER:
 
                /*
-                * Was this an "ISO time" with embedded field labels? An
-                * example is "h04mm05s06" - thomas 2001-02-04
+                * Deal with cases where previous field labeled this one
                 */
                if (ptype != 0)
                {
                    char       *cp;
                    int         value;
 
-                   /* Only accept a date under limited circumstances */
-                   switch (ptype)
-                   {
-                       case DTK_JULIAN:
-                       case DTK_YEAR:
-                       case DTK_MONTH:
-                       case DTK_DAY:
-                           if (tzp == NULL)
-                               return DTERR_BAD_FORMAT;
-                       default:
-                           break;
-                   }
-
                    errno = 0;
                    value = strtoint(field[i], &cp, 10);
                    if (errno == ERANGE)
                        return DTERR_FIELD_OVERFLOW;
-
-                   /*
-                    * only a few kinds are allowed to have an embedded
-                    * decimal
-                    */
-                   if (*cp == '.')
-                       switch (ptype)
-                       {
-                           case DTK_JULIAN:
-                           case DTK_TIME:
-                           case DTK_SECOND:
-                               break;
-                           default:
-                               return DTERR_BAD_FORMAT;
-                               break;
-                       }
-                   else if (*cp != '\0')
+                   if (*cp != '.' && *cp != '\0')
                        return DTERR_BAD_FORMAT;
 
                    switch (ptype)
                    {
-                       case DTK_YEAR:
-                           tm->tm_year = value;
-                           tmask = DTK_M(YEAR);
-                           break;
-
-                       case DTK_MONTH:
-
-                           /*
-                            * already have a month and hour? then assume
-                            * minutes
-                            */
-                           if ((fmask & DTK_M(MONTH)) != 0 &&
-                               (fmask & DTK_M(HOUR)) != 0)
-                           {
-                               tm->tm_min = value;
-                               tmask = DTK_M(MINUTE);
-                           }
-                           else
-                           {
-                               tm->tm_mon = value;
-                               tmask = DTK_M(MONTH);
-                           }
-                           break;
-
-                       case DTK_DAY:
-                           tm->tm_mday = value;
-                           tmask = DTK_M(DAY);
-                           break;
-
-                       case DTK_HOUR:
-                           tm->tm_hour = value;
-                           tmask = DTK_M(HOUR);
-                           break;
-
-                       case DTK_MINUTE:
-                           tm->tm_min = value;
-                           tmask = DTK_M(MINUTE);
-                           break;
-
-                       case DTK_SECOND:
-                           tm->tm_sec = value;
-                           tmask = DTK_M(SECOND);
-                           if (*cp == '.')
-                           {
-                               dterr = ParseFractionalSecond(cp, fsec);
-                               if (dterr)
-                                   return dterr;
-                               tmask = DTK_ALL_SECS_M;
-                           }
-                           break;
-
-                       case DTK_TZ:
-                           tmask = DTK_M(TZ);
-                           dterr = DecodeTimezone(field[i], tzp);
-                           if (dterr)
-                               return dterr;
-                           break;
-
                        case DTK_JULIAN:
                            /* previous field was a label for "julian date" */
+                           if (tzp == NULL)
+                               return DTERR_BAD_FORMAT;
                            if (value < 0)
                                return DTERR_FIELD_OVERFLOW;
                            tmask = DTK_DATE_M;
@@ -2378,24 +2218,17 @@ DecodeTimeOnly(char **field, int *ftype, int nf,
 
                    case UNITS:
                        tmask = 0;
+                       /* reject consecutive unhandled units */
+                       if (ptype != 0)
+                           return DTERR_BAD_FORMAT;
                        ptype = val;
                        break;
 
                    case ISOTIME:
                        tmask = 0;
-
-                       /***
-                        * We will need one of the following fields:
-                        *  DTK_NUMBER should be hhmmss.fff
-                        *  DTK_TIME should be hh:mm:ss.fff
-                        *  DTK_DATE should be hhmmss-zz
-                        ***/
-                       if (i >= nf - 1 ||
-                           (ftype[i + 1] != DTK_NUMBER &&
-                            ftype[i + 1] != DTK_TIME &&
-                            ftype[i + 1] != DTK_DATE))
+                       /* reject consecutive unhandled units */
+                       if (ptype != 0)
                            return DTERR_BAD_FORMAT;
-
                        ptype = val;
                        break;
 
@@ -2426,6 +2259,10 @@ DecodeTimeOnly(char **field, int *ftype, int nf,
        fmask |= tmask;
    }                           /* end loop over fields */
 
+   /* reject if prefix type appeared and was never handled */
+   if (ptype != 0)
+       return DTERR_BAD_FORMAT;
+
    /* do final checking/adjustment of Y/M/D fields */
    dterr = ValidateDate(fmask, isjulian, is2digits, bc, tm);
    if (dterr)
index 4f01131077bf43556fe9e7dac833dea6e2accbf0..e63e5b30fee8e16f979bc5372be6c97e1d41bf35 100644 (file)
@@ -83,6 +83,18 @@ SELECT timestamp with time zone '12/27/2001 04:05:06.789-08';
  Thu Dec 27 04:05:06.789 2001 PST
 (1 row)
 
+SELECT timestamp with time zone '2001-12-27 04:05:06.789 MET DST';
+           timestamptz            
+----------------------------------
+ Wed Dec 26 18:05:06.789 2001 PST
+(1 row)
+
+SELECT timestamp with time zone '2001-12-27 allballs';
+         timestamptz          
+------------------------------
+ Wed Dec 26 16:00:00 2001 PST
+(1 row)
+
 -- should fail in mdy mode:
 SELECT timestamp with time zone '27/12/2001 04:05:06.789-08';
 ERROR:  date/time field value out of range: "27/12/2001 04:05:06.789-08"
@@ -97,30 +109,6 @@ SELECT timestamp with time zone '27/12/2001 04:05:06.789-08';
 (1 row)
 
 reset datestyle;
-SELECT timestamp with time zone 'Y2001M12D27H04M05S06.789+08';
-           timestamptz            
-----------------------------------
- Wed Dec 26 12:05:06.789 2001 PST
-(1 row)
-
-SELECT timestamp with time zone 'Y2001M12D27H04M05S06.789-08';
-           timestamptz            
-----------------------------------
- Thu Dec 27 04:05:06.789 2001 PST
-(1 row)
-
-SELECT timestamp with time zone 'Y2001M12D27H04MM05S06.789+08';
-           timestamptz            
-----------------------------------
- Wed Dec 26 12:05:06.789 2001 PST
-(1 row)
-
-SELECT timestamp with time zone 'Y2001M12D27H04MM05S06.789-08';
-           timestamptz            
-----------------------------------
- Thu Dec 27 04:05:06.789 2001 PST
-(1 row)
-
 SELECT timestamp with time zone 'J2452271+08';
          timestamptz          
 ------------------------------
@@ -269,6 +257,23 @@ SELECT time with time zone 'T040506.789 -08';
  04:05:06.789-08
 (1 row)
 
+-- time with time zone should accept a date for DST resolution purposes
+SELECT time with time zone 'T040506.789 America/Los_Angeles';
+ERROR:  invalid input syntax for type time with time zone: "T040506.789 America/Los_Angeles"
+LINE 1: SELECT time with time zone 'T040506.789 America/Los_Angeles'...
+                                   ^
+SELECT time with time zone '2001-12-27 T040506.789 America/Los_Angeles';
+     timetz      
+-----------------
+ 04:05:06.789-08
+(1 row)
+
+SELECT time with time zone 'J2452271 T040506.789 America/Los_Angeles';
+     timetz      
+-----------------
+ 04:05:06.789-08
+(1 row)
+
 SET DateStyle = 'Postgres, MDY';
 -- Check Julian dates BC
 SELECT date 'J1520447' AS "Confucius' Birthday";
@@ -283,6 +288,25 @@ SELECT date 'J0' AS "Julian Epoch";
  11-24-4714 BC
 (1 row)
 
+-- test error on dangling Julian units
+SELECT date '1995-08-06  J J J';
+ERROR:  invalid input syntax for type date: "1995-08-06  J J J"
+LINE 1: SELECT date '1995-08-06  J J J';
+                    ^
+SELECT date 'J J 1520447';
+ERROR:  invalid input syntax for type date: "J J 1520447"
+LINE 1: SELECT date 'J J 1520447';
+                    ^
+-- We used to accept this input style, but it was based on a misreading
+-- of ISO8601, and it was never documented anyway
+SELECT timestamp with time zone 'Y2001M12D27H04M05S06.789+08';
+ERROR:  invalid input syntax for type timestamp with time zone: "Y2001M12D27H04M05S06.789+08"
+LINE 1: SELECT timestamp with time zone 'Y2001M12D27H04M05S06.789+08...
+                                        ^
+SELECT timestamp with time zone 'Y2001M12D27H04MM05S06.789-08';
+ERROR:  invalid input syntax for type timestamp with time zone: "Y2001M12D27H04MM05S06.789-08"
+LINE 1: SELECT timestamp with time zone 'Y2001M12D27H04MM05S06.789-0...
+                                        ^
 -- conflicting fields should throw errors
 SELECT date '1995-08-06 epoch';
 ERROR:  invalid input syntax for type date: "1995-08-06 epoch"
index 0676cac5d173e26523ef2edccf98998c03a70355..f7f8c8d2dd9eba2b116a97239a7b3274a5f33353 100644 (file)
@@ -20,15 +20,13 @@ SELECT timestamp with time zone '2001-12-27 04:05:06.789-08';
 SELECT timestamp with time zone '2001.12.27 04:05:06.789-08';
 SELECT timestamp with time zone '2001/12/27 04:05:06.789-08';
 SELECT timestamp with time zone '12/27/2001 04:05:06.789-08';
+SELECT timestamp with time zone '2001-12-27 04:05:06.789 MET DST';
+SELECT timestamp with time zone '2001-12-27 allballs';
 -- should fail in mdy mode:
 SELECT timestamp with time zone '27/12/2001 04:05:06.789-08';
 set datestyle to dmy;
 SELECT timestamp with time zone '27/12/2001 04:05:06.789-08';
 reset datestyle;
-SELECT timestamp with time zone 'Y2001M12D27H04M05S06.789+08';
-SELECT timestamp with time zone 'Y2001M12D27H04M05S06.789-08';
-SELECT timestamp with time zone 'Y2001M12D27H04MM05S06.789+08';
-SELECT timestamp with time zone 'Y2001M12D27H04MM05S06.789-08';
 SELECT timestamp with time zone 'J2452271+08';
 SELECT timestamp with time zone 'J2452271-08';
 SELECT timestamp with time zone 'J2452271.5+08';
@@ -57,11 +55,24 @@ SELECT time with time zone 'T040506.789+08';
 SELECT time with time zone 'T040506.789-08';
 SELECT time with time zone 'T040506.789 +08';
 SELECT time with time zone 'T040506.789 -08';
+-- time with time zone should accept a date for DST resolution purposes
+SELECT time with time zone 'T040506.789 America/Los_Angeles';
+SELECT time with time zone '2001-12-27 T040506.789 America/Los_Angeles';
+SELECT time with time zone 'J2452271 T040506.789 America/Los_Angeles';
 SET DateStyle = 'Postgres, MDY';
 -- Check Julian dates BC
 SELECT date 'J1520447' AS "Confucius' Birthday";
 SELECT date 'J0' AS "Julian Epoch";
 
+-- test error on dangling Julian units
+SELECT date '1995-08-06  J J J';
+SELECT date 'J J 1520447';
+
+-- We used to accept this input style, but it was based on a misreading
+-- of ISO8601, and it was never documented anyway
+SELECT timestamp with time zone 'Y2001M12D27H04M05S06.789+08';
+SELECT timestamp with time zone 'Y2001M12D27H04MM05S06.789-08';
+
 -- conflicting fields should throw errors
 SELECT date '1995-08-06 epoch';
 SELECT date '1995-08-06 infinity';