From c2ecbc9a8262595b27f741e41375d06213a30fb6 Mon Sep 17 00:00:00 2001 From: "J.T. Conklin" Date: Wed, 17 Aug 2011 16:40:49 -0700 Subject: [PATCH] parse-datetime: accept ISO 8601 date and time rep with "T" separator The parser now accepts ISO 8601 date-time strings with "T" as the separator. It has long parsed dates like "2004-02-29 16:21:42" with a space between the date and time strings. Now it also parses "2004-02-29T16:21:42" and fractional-second and time-zone-annotated variants like "2004-02-29T16:21:42.333-07:00" * lib/parse-datetime.y: Parse ISO 8601 extended date and time of day representation using the 'T' separator character. * doc/parse-datetime.texi (General date syntax): replace use of deprecated --iso-8601 option with --rfc-3339 in example of date command output formats that can be parsed. * tests/test-parse-datetime.c (tm_diff): New function, taken from lib/parse-datetime.y. (gmt_offset): New function. (main): Add additional test cases to validate ISO8601 extended date and time of day format parsing. --- ChangeLog | 19 ++++++ doc/parse-datetime.texi | 4 +- lib/parse-datetime.y | 95 ++++++++++++++++++--------- tests/test-parse-datetime.c | 154 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 240 insertions(+), 32 deletions(-) diff --git a/ChangeLog b/ChangeLog index 682105717..b112f74d9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,22 @@ +2011-08-17 J.T. Conklin + + parse-datetime: accept ISO 8601 date and time rep with "T" separator + The parser now accepts ISO 8601 date-time strings with "T" as the + separator. It has long parsed dates like "2004-02-29 16:21:42" + with a space between the date and time strings. Now it also parses + "2004-02-29T16:21:42" and fractional-second and time-zone-annotated + variants like "2004-02-29T16:21:42.333-07:00" + * lib/parse-datetime.y: Parse ISO 8601 extended date and time + of day representation using the 'T' separator character. + * doc/parse-datetime.texi (General date syntax): replace use of + deprecated --iso-8601 option with --rfc-3339 in example of date + command output formats that can be parsed. + * tests/test-parse-datetime.c (tm_diff): New function, taken from + lib/parse-datetime.y. + (gmt_offset): New function. + (main): Add additional test cases to validate ISO8601 extended + date and time of day parsing. + 2011-08-31 Bruno Haible freopen: Documentation. diff --git a/doc/parse-datetime.texi b/doc/parse-datetime.texi index 2f1ab34ec..b159f9645 100644 --- a/doc/parse-datetime.texi +++ b/doc/parse-datetime.texi @@ -126,8 +126,8 @@ $ LC_ALL=C TZ=UTC0 date Mon Mar 1 00:21:42 UTC 2004 $ TZ=UTC0 date +'%Y-%m-%d %H:%M:%SZ' 2004-03-01 00:21:42Z -$ date --iso-8601=ns | tr T ' ' # --iso-8601 is a GNU extension. -2004-02-29 16:21:42,692722128-0800 +$ date --rfc-3339=ns # --rfc-3339 is a GNU extension. +2004-02-29 16:21:42.692722128-08:00 $ date --rfc-2822 # a GNU extension Sun, 29 Feb 2004 16:21:42 -0800 $ date +'%Y-%m-%d %H:%M:%S %z' # %z is a GNU extension. diff --git a/lib/parse-datetime.y b/lib/parse-datetime.y index 23a9a4159..027f79733 100644 --- a/lib/parse-datetime.y +++ b/lib/parse-datetime.y @@ -285,8 +285,8 @@ set_hhmmss (parser_control *pc, long int hour, long int minutes, %parse-param { parser_control *pc } %lex-param { parser_control *pc } -/* This grammar has 20 shift/reduce conflicts. */ -%expect 20 +/* This grammar has 31 shift/reduce conflicts. */ +%expect 31 %union { @@ -307,7 +307,7 @@ set_hhmmss (parser_control *pc, long int hour, long int minutes, %token tSNUMBER tUNUMBER %token tSDECIMAL_NUMBER tUDECIMAL_NUMBER -%type o_colon_minutes o_merid +%type o_colon_minutes %type seconds signed_seconds unsigned_seconds %type relunit relunit_snumber dayshift @@ -333,7 +333,9 @@ items: ; item: - time + datetime + { pc->times_seen++; pc->dates_seen++; } + | time { pc->times_seen++; } | local_zone { pc->local_zones_seen++; } @@ -348,35 +350,61 @@ item: | hybrid ; +datetime: + iso_8601_datetime + ; + +iso_8601_datetime: + iso_8601_date 'T' iso_8601_time + ; + time: tUNUMBER tMERIDIAN { set_hhmmss (pc, $1.value, 0, 0, 0); pc->meridian = $2; } - | tUNUMBER ':' tUNUMBER o_merid + | tUNUMBER ':' tUNUMBER tMERIDIAN { set_hhmmss (pc, $1.value, $3.value, 0, 0); pc->meridian = $4; } - | tUNUMBER ':' tUNUMBER tSNUMBER o_colon_minutes + | tUNUMBER ':' tUNUMBER ':' unsigned_seconds tMERIDIAN + { + set_hhmmss (pc, $1.value, $3.value, $5.tv_sec, $5.tv_nsec); + pc->meridian = $6; + } + | iso_8601_time + ; + +iso_8601_time: + tUNUMBER zone_offset + { + set_hhmmss (pc, $1.value, 0, 0, 0); + pc->meridian = MER24; + } + | tUNUMBER ':' tUNUMBER o_zone_offset { set_hhmmss (pc, $1.value, $3.value, 0, 0); pc->meridian = MER24; - pc->zones_seen++; - pc->time_zone = time_zone_hhmm (pc, $4, $5); } - | tUNUMBER ':' tUNUMBER ':' unsigned_seconds o_merid + | tUNUMBER ':' tUNUMBER ':' unsigned_seconds o_zone_offset { set_hhmmss (pc, $1.value, $3.value, $5.tv_sec, $5.tv_nsec); - pc->meridian = $6; + pc->meridian = MER24; } - | tUNUMBER ':' tUNUMBER ':' unsigned_seconds tSNUMBER o_colon_minutes + ; + +o_zone_offset: + /* empty */ + | zone_offset + ; + +zone_offset: + tSNUMBER o_colon_minutes { - set_hhmmss (pc, $1.value, $3.value, $5.tv_sec, $5.tv_nsec); - pc->meridian = MER24; pc->zones_seen++; - pc->time_zone = time_zone_hhmm (pc, $6, $7); + pc->time_zone = time_zone_hhmm (pc, $1, $2); } ; @@ -393,12 +421,19 @@ local_zone: } ; +/* Note 'T' is a special case, as it is used as the separator in ISO + 8601 date and time of day representation. */ zone: tZONE { pc->time_zone = $1; } + | 'T' + { pc->time_zone = HOUR(7); } | tZONE relunit_snumber { pc->time_zone = $1; apply_relative_time (pc, $2, 1); } + | 'T' relunit_snumber + { pc->time_zone = HOUR(7); + apply_relative_time (pc, $2, 1); } | tZONE tSNUMBER o_colon_minutes { pc->time_zone = $1 + time_zone_hhmm (pc, $2, $3); } | tDAYZONE @@ -456,13 +491,6 @@ date: pc->year = $5; } } - | tUNUMBER tSNUMBER tSNUMBER - { - /* ISO 8601 format. YYYY-MM-DD. */ - pc->year = $1; - pc->month = -$2.value; - pc->day = -$3.value; - } | tUNUMBER tMONTH tSNUMBER { /* e.g. 17-JUN-1992. */ @@ -501,6 +529,17 @@ date: pc->month = $2; pc->year = $3; } + | iso_8601_date + ; + +iso_8601_date: + tUNUMBER tSNUMBER tSNUMBER + { + /* ISO 8601 format. YYYY-MM-DD. */ + pc->year = $1; + pc->month = -$2.value; + pc->day = -$3.value; + } ; rel: @@ -612,13 +651,6 @@ o_colon_minutes: { $$ = $2.value; } ; -o_merid: - /* empty */ - { $$ = MER24; } - | tMERIDIAN - { $$ = $1; } - ; - %% static table const meridian_table[] = @@ -773,7 +805,10 @@ static table const time_zone_table[] = { NULL, 0, 0 } }; -/* Military time zone table. */ +/* Military time zone table. + + Note 'T' is a special case, as it is used as the separator in ISO + 8601 date and time of day representation. */ static table const military_table[] = { { "A", tZONE, -HOUR ( 1) }, @@ -794,7 +829,7 @@ static table const military_table[] = { "Q", tZONE, HOUR ( 4) }, { "R", tZONE, HOUR ( 5) }, { "S", tZONE, HOUR ( 6) }, - { "T", tZONE, HOUR ( 7) }, + { "T", 'T', 0 }, { "U", tZONE, HOUR ( 8) }, { "V", tZONE, HOUR ( 9) }, { "W", tZONE, HOUR (10) }, diff --git a/tests/test-parse-datetime.c b/tests/test-parse-datetime.c index 45dbae676..4cb85d5aa 100644 --- a/tests/test-parse-datetime.c +++ b/tests/test-parse-datetime.c @@ -48,17 +48,171 @@ static const char* const day_table[] = NULL }; + +#if ! HAVE_TM_GMTOFF +/* Shift A right by B bits portably, by dividing A by 2**B and + truncating towards minus infinity. A and B should be free of side + effects, and B should be in the range 0 <= B <= INT_BITS - 2, where + INT_BITS is the number of useful bits in an int. GNU code can + assume that INT_BITS is at least 32. + + ISO C99 says that A >> B is implementation-defined if A < 0. Some + implementations (e.g., UNICOS 9.0 on a Cray Y-MP EL) don't shift + right in the usual way when A < 0, so SHR falls back on division if + ordinary A >> B doesn't seem to be the usual signed shift. */ +#define SHR(a, b) \ + (-1 >> 1 == -1 \ + ? (a) >> (b) \ + : (a) / (1 << (b)) - ((a) % (1 << (b)) < 0)) + +#define TM_YEAR_BASE 1900 + +/* Yield the difference between *A and *B, + measured in seconds, ignoring leap seconds. + The body of this function is taken directly from the GNU C Library; + see src/strftime.c. */ +static long int +tm_diff (struct tm const *a, struct tm const *b) +{ + /* Compute intervening leap days correctly even if year is negative. + Take care to avoid int overflow in leap day calculations. */ + int a4 = SHR (a->tm_year, 2) + SHR (TM_YEAR_BASE, 2) - ! (a->tm_year & 3); + int b4 = SHR (b->tm_year, 2) + SHR (TM_YEAR_BASE, 2) - ! (b->tm_year & 3); + int a100 = a4 / 25 - (a4 % 25 < 0); + int b100 = b4 / 25 - (b4 % 25 < 0); + int a400 = SHR (a100, 2); + int b400 = SHR (b100, 2); + int intervening_leap_days = (a4 - b4) - (a100 - b100) + (a400 - b400); + long int ayear = a->tm_year; + long int years = ayear - b->tm_year; + long int days = (365 * years + intervening_leap_days + + (a->tm_yday - b->tm_yday)); + return (60 * (60 * (24 * days + (a->tm_hour - b->tm_hour)) + + (a->tm_min - b->tm_min)) + + (a->tm_sec - b->tm_sec)); +} +#endif /* ! HAVE_TM_GMTOFF */ + +long +gmt_offset() +{ + time_t now; + long gmtoff; + + time(&now); + +#if !HAVE_TM_GMTOFF + struct tm tm_local = *localtime(&now); + struct tm tm_gmt = *gmtime(&now); + + gmtoff = tm_diff(&tm_local, &tm_gmt); +#else + gmtoff = localtime(&now)->tm_gmtoff; +#endif + + return gmtoff; +} + int main (int argc _GL_UNUSED, char **argv) { struct timespec result; struct timespec result2; + struct timespec expected; struct timespec now; const char *p; int i; + long gmtoff; set_program_name (argv[0]); + gmtoff = gmt_offset(); + + + /* ISO 8601 extended date and time of day representation, + 'T' separator, local time zone */ + p = "2011-05-01T11:55:18"; + expected.tv_sec = 1304250918 - gmtoff; + expected.tv_nsec = 0; + ASSERT (parse_datetime (&result, p, 0)); + LOG (p, expected, result); + ASSERT (expected.tv_sec == result.tv_sec + && expected.tv_nsec == result.tv_nsec); + + /* ISO 8601 extended date and time of day representation, + ' ' separator, local time zone */ + p = "2011-05-01 11:55:18"; + expected.tv_sec = 1304250918 - gmtoff; + expected.tv_nsec = 0; + ASSERT (parse_datetime (&result, p, 0)); + LOG (p, expected, result); + ASSERT (expected.tv_sec == result.tv_sec + && expected.tv_nsec == result.tv_nsec); + + + /* ISO 8601, extended date and time of day representation, + 'T' separator, UTC */ + p = "2011-05-01T11:55:18Z"; + expected.tv_sec = 1304250918; + expected.tv_nsec = 0; + ASSERT (parse_datetime (&result, p, 0)); + LOG (p, expected, result); + ASSERT (expected.tv_sec == result.tv_sec + && expected.tv_nsec == result.tv_nsec); + + /* ISO 8601, extended date and time of day representation, + ' ' separator, UTC */ + p = "2011-05-01 11:55:18Z"; + expected.tv_sec = 1304250918; + expected.tv_nsec = 0; + ASSERT (parse_datetime (&result, p, 0)); + LOG (p, expected, result); + ASSERT (expected.tv_sec == result.tv_sec + && expected.tv_nsec == result.tv_nsec); + + + /* ISO 8601 extended date and time of day representation, + 'T' separator, w/UTC offset */ + p = "2011-05-01T11:55:18-07:00"; + expected.tv_sec = 1304276118; + expected.tv_nsec = 0; + ASSERT (parse_datetime (&result, p, 0)); + LOG (p, expected, result); + ASSERT (expected.tv_sec == result.tv_sec + && expected.tv_nsec == result.tv_nsec); + + /* ISO 8601 extended date and time of day representation, + ' ' separator, w/UTC offset */ + p = "2011-05-01 11:55:18-07:00"; + expected.tv_sec = 1304276118; + expected.tv_nsec = 0; + ASSERT (parse_datetime (&result, p, 0)); + LOG (p, expected, result); + ASSERT (expected.tv_sec == result.tv_sec + && expected.tv_nsec == result.tv_nsec); + + + /* ISO 8601 extended date and time of day representation, + 'T' separator, w/hour only UTC offset */ + p = "2011-05-01T11:55:18-07"; + expected.tv_sec = 1304276118; + expected.tv_nsec = 0; + ASSERT (parse_datetime (&result, p, 0)); + LOG (p, expected, result); + ASSERT (expected.tv_sec == result.tv_sec + && expected.tv_nsec == result.tv_nsec); + + /* ISO 8601 extended date and time of day representation, + ' ' separator, w/hour only UTC offset */ + p = "2011-05-01 11:55:18-07"; + expected.tv_sec = 1304276118; + expected.tv_nsec = 0; + ASSERT (parse_datetime (&result, p, 0)); + LOG (p, expected, result); + ASSERT (expected.tv_sec == result.tv_sec + && expected.tv_nsec == result.tv_nsec); + + now.tv_sec = 4711; now.tv_nsec = 1267; p = "now"; -- 2.11.0