diff options
author | David Perry <boolean263@protonmail.com> | 2021-01-08 09:18:39 +0000 |
---|---|---|
committer | AndersBroman <a.broman58@gmail.com> | 2021-01-08 09:18:39 +0000 |
commit | b758fdaede7960db9f86786b91c7b23e80210e6f (patch) | |
tree | 5e5d61dc5da72185a540f0abe4efdbf9224c57f6 /wsutil | |
parent | 73cef353bf4bf6ed5b59af2089112e0144fd9b94 (diff) |
Add iso8601_to_nstime() for editcap and nettrace
This adds a function to parse a string date-time in ISO 8601 format into
a `nstime_t` structure. It's based on code from epan/tvbuff.c and
wiretap/nettrace_3gpp_32_423.c and meant to eventually replace both.
(Currently only replaces the latter.)
Since most of Wireshark expects ISO 8601 date-times to fit a fairly
strict pattern, iso8601_to_nstime() currently rejects date-times without
separators between the components, even though ISO 8601 actually permits
this. This could be revisited later.
Also uses iso8601_to_nstime in editcap to parse the -A/-B options,
thus allowing the user to specify a time zone if desired. (See #17110)
Diffstat (limited to 'wsutil')
-rw-r--r-- | wsutil/nstime.c | 189 | ||||
-rw-r--r-- | wsutil/nstime.h | 6 | ||||
-rw-r--r-- | wsutil/time_util.c | 41 | ||||
-rw-r--r-- | wsutil/time_util.h | 12 |
4 files changed, 247 insertions, 1 deletions
diff --git a/wsutil/nstime.c b/wsutil/nstime.c index 16d6e0adcc..a2939a4c30 100644 --- a/wsutil/nstime.c +++ b/wsutil/nstime.c @@ -10,9 +10,12 @@ * SPDX-License-Identifier: GPL-2.0-or-later */ +#include <stdio.h> +#include <string.h> #include <glib.h> #include "nstime.h" #include "epochs.h" +#include "time_util.h" /* this is #defined so that we can clearly see that we have the right number of zeros, rather than as a guard against the number of nanoseconds in a second @@ -265,6 +268,192 @@ nsfiletime_to_nstime(nstime_t *nstime, guint64 nsfiletime) } /* + * function: iso8601_to_nstime + * parses a character string for a date and time given in + * ISO 8601 date-time format (eg: 2014-04-07T05:41:56.782+00:00) + * and converts to an nstime_t + * returns number of chars parsed on success, or 0 on failure + * + * NB. ISO 8601 is actually a lot more flexible than the above format, + * much to a developer's chagrin. The -/T/: separators are technically + * optional. + * Code is here to allow for that, but short-circuited for now since + * our callers assume they're there. + * + * Future improvements could parse other ISO 8601 formats, such as + * YYYY-Www-D, YYYY-DDD, etc. For a relatively easy introduction to + * these formats, see wikipedia: https://en.wikipedia.org/wiki/ISO_8601 + */ +guint8 +iso8601_to_nstime(nstime_t *nstime, const char *ptr) +{ + struct tm tm; + gint n_scanned = 0; + gint n_chars = 0; + guint frac = 0; + gint off_hr = 0; + gint off_min = 0; + guint8 ret_val = 0; + const char *start = ptr; + gboolean has_separator = FALSE; + gboolean have_offset = FALSE; + + memset(&tm, 0, sizeof(tm)); + tm.tm_isdst = -1; + nstime_set_unset(nstime); + + /* There may be 2 or 0 dashes between the date parts */ + has_separator = (*(ptr+4) == '-'); + + /* For now we require the separator to remove ambiguity */ + if (!has_separator) return 0; + + /* Note: sscanf is known to be inconsistent across platforms with respect + to whether a %n is counted as a return value or not, so we use '<'/'>=' + */ + n_scanned = sscanf(ptr, has_separator ? "%4u-%2u-%2u%n" : "%4u%2u%2u%n", + &tm.tm_year, + &tm.tm_mon, + &tm.tm_mday, + &n_chars); + if (n_scanned >= 3) { + /* Got year, month, and day */ + tm.tm_mon--; /* struct tm expects 0-based month */ + tm.tm_year -= 1900; /* struct tm expects number of years since 1900 */ + ptr += n_chars; + } + else { + return 0; + } + + if (*ptr == 'T' || *ptr == ' ') { + /* The 'T' between date and time is optional if the meaning is + unambiguous. We also allow for ' ' here to support formats + such as editcap's -A/-B options */ + ptr++; + } + else { + /* For now we require the separator to remove ambiguity; + remove this entire 'else' when we wish to change that */ + return 0; + } + + /* Now we're on to the time part. We'll require a minimum of hours and + minutes. + Test for a possible ':' */ + has_separator = (*(ptr+2) == ':'); + if (!has_separator) return 0; + + n_scanned = sscanf(ptr, has_separator ? "%2u:%2u%n" : "%2u%2u%n", + &tm.tm_hour, + &tm.tm_min, + &n_chars); + if (n_scanned >= 2) { + ptr += n_chars; + } + else { + /* didn't get hours and minutes */ + return 0; + } + + /* Test for (whole) seconds */ + if ((has_separator && *ptr == ':') || + (!has_separator && g_ascii_isdigit(*ptr))) { + /* Looks like we should have them */ + if (1 > sscanf(ptr, has_separator ? ":%2u%n" : "%2u%n", + &tm.tm_sec, &n_chars)) { + /* Couldn't get them */ + return 0; + } + ptr += n_chars; + + /* Now let's test for fractional seconds */ + if (*ptr == '.' || *ptr == ',') { + /* Get fractional seconds */ + ptr++; + if (1 <= sscanf(ptr, "%u%n", &frac, &n_chars)) { + /* normalize frac to nanoseconds */ + if ((frac >= 1000000000) || (frac == 0)) { + frac = 0; + } else { + switch (n_chars) { /* including leading zeros */ + case 1: frac *= 100000000; break; + case 2: frac *= 10000000; break; + case 3: frac *= 1000000; break; + case 4: frac *= 100000; break; + case 5: frac *= 10000; break; + case 6: frac *= 1000; break; + case 7: frac *= 100; break; + case 8: frac *= 10; break; + default: break; + } + } + ptr += n_chars; + } + /* If we didn't get frac, it's still its default of 0 */ + } + } + else { + tm.tm_sec = 0; + } + + /* Validate what we got so far. mktime() doesn't care about strange + values (and we use this to our advantage when calculating the + time zone offset) but we should at least start with something valid */ + if (!tm_is_valid(&tm)) { + return 0; + } + + /* Check for a time zone offset */ + if (*ptr == '-' || *ptr == '+' || *ptr == 'Z') { + /* We have a UTC-relative offset */ + if (*ptr == 'Z') { + off_hr = off_min = n_scanned = 0; + have_offset = TRUE; + ptr++; + } + else { + has_separator = (*(ptr+3) == ':'); + if (!has_separator) return 0; + n_scanned = sscanf(ptr, has_separator ? "%3d:%2d%n" : "%3d%2d%n", + &off_hr, + &off_min, + &n_chars); + if (n_scanned >= 1) { + /* Definitely got hours */ + have_offset = TRUE; + if (n_scanned >= 2) { + /* Got minutes too */ + ptr += n_chars; + } + else { + /* Only got hours, just move ptr past the +hh or whatever */ + off_min = 0; + ptr += 3; + } + } + else { + /* Didn't get a valid offset, treat as if there's none at all */ + off_hr = off_min = n_scanned = 0; + have_offset = FALSE; + } + } + } + if (have_offset) { + tm.tm_hour -= off_hr; + tm.tm_min -= (off_hr < 0 ? -off_min : off_min); + nstime->secs = mktime_utc(&tm); + } + else { + /* No UTC offset given; ISO 8601 says this means localtime */ + nstime->secs = mktime(&tm); + } + nstime->nsecs = frac; + ret_val = (guint)(ptr-start); + return ret_val; +} + +/* * Editor modelines * * Local Variables: diff --git a/wsutil/nstime.h b/wsutil/nstime.h index 6c50e1fb92..2292b6f811 100644 --- a/wsutil/nstime.h +++ b/wsutil/nstime.h @@ -11,6 +11,7 @@ #ifndef __NSTIME_H__ #define __NSTIME_H__ +#include <glib.h> #include <time.h> #include "ws_symbol_export.h" @@ -122,6 +123,11 @@ WS_DLL_PUBLIC gboolean filetime_to_nstime(nstime_t *nstime, guint64 filetime); FALSE on failure */ WS_DLL_PUBLIC gboolean nsfiletime_to_nstime(nstime_t *nstime, guint64 nsfiletime); +/** parse an ISO 8601 format datetime string to nstime, returns number of + chars parsed on success, 0 on failure. + Note that nstime is set to unset in the case of failure */ +WS_DLL_PUBLIC guint8 iso8601_to_nstime(nstime_t *nstime, const char *ptr); + #ifdef __cplusplus } #endif /* __cplusplus */ diff --git a/wsutil/time_util.c b/wsutil/time_util.c index 98df723a9b..a842acbaac 100644 --- a/wsutil/time_util.c +++ b/wsutil/time_util.c @@ -23,6 +23,9 @@ #include <windows.h> #endif +/* Test if the given year is a leap year */ +#define isleap(y) (((y) % 4) == 0 && (((y) % 100) != 0 || ((y) % 400) == 0)) + /* converts a broken down date representation, relative to UTC, * to a timestamp; it uses timegm() if it's available. * Copied from Glib source gtimer.c @@ -47,7 +50,7 @@ mktime_utc(struct tm *tm) /* count number of leap years */ yr = tm->tm_year + 1900; - if (tm->tm_mon + 1 < 3 && (yr % 4) == 0 && ((yr % 100) != 0 || (yr % 400) == 0)) + if (tm->tm_mon + 1 < 3 && isleap(yr)) yr--; retval += (((yr / 4) - (yr / 100) + (yr / 400)) - 477); /* 477 = ((1970 / 4) - (1970 / 100) + (1970 / 400)) */ @@ -61,6 +64,42 @@ mktime_utc(struct tm *tm) #endif /* !HAVE_TIMEGM */ } +/* Validate the values in a time_t + * Currently checks tm_year, tm_mon, tm_mday, tm_hour, tm_min, and tm_sec; + * disregards tm_wday, tm_yday, and tm_isdst. + * Use this in situations where you wish to return an error rather than + * normalizing invalid dates; otherwise you could specify, for example, + * 2020-10-40 (to quote the macOS and probably *BSD manual + * page for ctime()/localtime()/mktime()/etc., "October 40 + * is changed into November 9"). + */ +gboolean +tm_is_valid(struct tm *tm) +{ + static const gint8 days_in_month[12] = { + 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 + }; + + if (tm->tm_mon < 0 || tm->tm_mon > 11) { + return FALSE; + } + if (tm->tm_mday < 0 || tm->tm_mday > + ((tm->tm_mon == 1 && isleap(tm->tm_year)) ? 29 : days_in_month[tm->tm_mon])) { + return FALSE; + } + if (tm->tm_hour < 0 || tm->tm_hour > 23) { + return FALSE; + } + if (tm->tm_min < 0 || tm->tm_min > 59) { + return FALSE; + } + if (tm->tm_sec < 0 || tm->tm_sec > 60) { + /* 60, not 59, to account for leap seconds */ + return FALSE; + } + return TRUE; +} + void get_resource_usage(double *user_time, double *sys_time) { #ifndef _WIN32 struct rusage ru; diff --git a/wsutil/time_util.h b/wsutil/time_util.h index 1cd430dedf..99d97d662e 100644 --- a/wsutil/time_util.h +++ b/wsutil/time_util.h @@ -18,9 +18,21 @@ extern "C" { #include <time.h> +/** Converts a broken down date representation, relative to UTC, + * to a timestamp + */ WS_DLL_PUBLIC time_t mktime_utc(struct tm *tm); +/** Validate the values in a time_t. + * Currently checks tm_year, tm_mon, tm_mday, tm_hour, tm_min, and tm_sec; + * disregards tm_wday, tm_yday, and tm_isdst. + * + * @param tm The struct tm to validate. + */ +WS_DLL_PUBLIC +gboolean tm_is_valid(struct tm *tm); + /** Fetch the process CPU time. * * Fetch the current process user and system CPU times, convert them to |